finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/*
|
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
|
3
|
+
|
|
4
|
+
This file is part of Velvet.
|
|
5
|
+
|
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with Velvet; if not, write to the Free Software
|
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
19
|
+
|
|
20
|
+
*/
|
|
21
|
+
#ifndef _PASSAGEMARKER_H_
|
|
22
|
+
#define _PASSAGEMARKER_H_
|
|
23
|
+
|
|
24
|
+
struct passageList_st {
|
|
25
|
+
PassageMarkerI marker;
|
|
26
|
+
PassageMarkerList *next;
|
|
27
|
+
} ATTRIBUTE_PACKED;
|
|
28
|
+
|
|
29
|
+
///////////////////////////////////////////////////////////////////
|
|
30
|
+
// PassageMarker lists
|
|
31
|
+
///////////////////////////////////////////////////////////////////
|
|
32
|
+
// You can always malloc a PassaegMarkerList but these routines manage the
|
|
33
|
+
// memory for you, thus avoiding fragmentation
|
|
34
|
+
PassageMarkerList *newPassageMarkerList(PassageMarkerI marker,
|
|
35
|
+
PassageMarkerList * next);
|
|
36
|
+
|
|
37
|
+
void deallocatePassageMarkerList(PassageMarkerList * list);
|
|
38
|
+
|
|
39
|
+
///////////////////////////////////////////////////////////////////
|
|
40
|
+
// Creators/Destructors
|
|
41
|
+
///////////////////////////////////////////////////////////////////
|
|
42
|
+
PassageMarkerI addPassageMarker(IDnum sequenceID, Coordinate start,
|
|
43
|
+
Node * node);
|
|
44
|
+
|
|
45
|
+
PassageMarkerI addUncertainPassageMarker(IDnum sequenceID, Node * node);
|
|
46
|
+
|
|
47
|
+
PassageMarkerI newPassageMarker(IDnum seqID, Coordinate start,
|
|
48
|
+
Coordinate finish, Coordinate startOffset,
|
|
49
|
+
Coordinate finishOffset);
|
|
50
|
+
|
|
51
|
+
// Deallocates but also removes all pointers towards that structure
|
|
52
|
+
void destroyPassageMarker(PassageMarkerI marker);
|
|
53
|
+
void destroyAllPassageMarkers();
|
|
54
|
+
|
|
55
|
+
///////////////////////////////////////////////////////////////////
|
|
56
|
+
// Node
|
|
57
|
+
///////////////////////////////////////////////////////////////////
|
|
58
|
+
|
|
59
|
+
// Current node
|
|
60
|
+
Node *getNode(PassageMarkerI marker);
|
|
61
|
+
|
|
62
|
+
// Yank out of current node
|
|
63
|
+
void extractPassageMarker(PassageMarkerI marker);
|
|
64
|
+
|
|
65
|
+
// Insert into a node
|
|
66
|
+
void transposePassageMarker(PassageMarkerI marker, Node * destination);
|
|
67
|
+
|
|
68
|
+
///////////////////////////////////////////////////////////////////
|
|
69
|
+
// General Info
|
|
70
|
+
///////////////////////////////////////////////////////////////////
|
|
71
|
+
// Export into file
|
|
72
|
+
void exportMarker(FILE * outfile, PassageMarkerI marker,
|
|
73
|
+
TightString * sequences, int wordLength);
|
|
74
|
+
|
|
75
|
+
// General info for debugging
|
|
76
|
+
char *readPassageMarker(PassageMarkerI marker);
|
|
77
|
+
|
|
78
|
+
// Sequence ID associated to the passage marker
|
|
79
|
+
IDnum getPassageMarkerSequenceID(PassageMarkerI marker);
|
|
80
|
+
IDnum getAbsolutePassMarkerSeqID(PassageMarkerI marker);
|
|
81
|
+
int passageMarkerDirection(PassageMarkerI marker);
|
|
82
|
+
|
|
83
|
+
// Coordinates
|
|
84
|
+
Coordinate getPassageMarkerStart(PassageMarkerI marker);
|
|
85
|
+
void setPassageMarkerStart(PassageMarkerI marker, Coordinate start);
|
|
86
|
+
Coordinate getPassageMarkerFinish(PassageMarkerI marker);
|
|
87
|
+
void setPassageMarkerFinish(PassageMarkerI marker, Coordinate finish);
|
|
88
|
+
Coordinate getPassageMarkerLength(PassageMarkerI marker);
|
|
89
|
+
|
|
90
|
+
// Offsets
|
|
91
|
+
Coordinate getStartOffset(PassageMarkerI marker);
|
|
92
|
+
void setStartOffset(PassageMarkerI marker, Coordinate offset);
|
|
93
|
+
void incrementStartOffset(PassageMarkerI marker, Coordinate offset);
|
|
94
|
+
Coordinate getFinishOffset(PassageMarkerI marker);
|
|
95
|
+
void setFinishOffset(PassageMarkerI marker, Coordinate offset);
|
|
96
|
+
void incrementFinishOffset(PassageMarkerI marker, Coordinate offset);
|
|
97
|
+
|
|
98
|
+
// Status
|
|
99
|
+
void setPassageMarkerStatus(PassageMarkerI marker, boolean status);
|
|
100
|
+
boolean getPassageMarkerStatus(PassageMarkerI marker);
|
|
101
|
+
|
|
102
|
+
///////////////////////////////////////////////////////////////////
|
|
103
|
+
// Marker Sequences
|
|
104
|
+
///////////////////////////////////////////////////////////////////
|
|
105
|
+
|
|
106
|
+
// Corresponding marker of reverse complement sequence
|
|
107
|
+
PassageMarkerI getTwinMarker(PassageMarkerI marker);
|
|
108
|
+
|
|
109
|
+
// Within a node
|
|
110
|
+
PassageMarkerI getNextInNode(PassageMarkerI marker);
|
|
111
|
+
void setNextInNode(PassageMarkerI marker, PassageMarkerI next);
|
|
112
|
+
void setTopOfTheNode(PassageMarkerI marker);
|
|
113
|
+
|
|
114
|
+
// Within a sequence
|
|
115
|
+
PassageMarkerI getNextInSequence(PassageMarkerI marker);
|
|
116
|
+
void setNextInSequence(PassageMarkerI previous, PassageMarkerI next);
|
|
117
|
+
PassageMarkerI getPreviousInSequence(PassageMarkerI marker);
|
|
118
|
+
void setPreviousInSequence(PassageMarkerI previous, PassageMarkerI next);
|
|
119
|
+
void connectPassageMarkers(PassageMarkerI previous, PassageMarkerI next,
|
|
120
|
+
Graph * graph);
|
|
121
|
+
|
|
122
|
+
// End of read chains
|
|
123
|
+
boolean isTerminal(PassageMarkerI marker);
|
|
124
|
+
boolean isInitial(PassageMarkerI marker);
|
|
125
|
+
|
|
126
|
+
// Checks whether the node of the next marker is the one given in parameter
|
|
127
|
+
boolean isDestinationToMarker(PassageMarkerI marker, Node * node);
|
|
128
|
+
|
|
129
|
+
// Bypasses the middle marker
|
|
130
|
+
void disconnectNextPassageMarker(PassageMarkerI marker, Graph * graph);
|
|
131
|
+
void deleteNextPassageMarker(PassageMarkerI marker, Graph * graph);
|
|
132
|
+
|
|
133
|
+
// Merge two markers (cf concatenateGraph())
|
|
134
|
+
void concatenatePassageMarkers(PassageMarkerI marker,
|
|
135
|
+
PassageMarkerI nextMarker);
|
|
136
|
+
|
|
137
|
+
#endif
|
|
@@ -0,0 +1,1717 @@
|
|
|
1
|
+
/*
|
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
|
3
|
+
|
|
4
|
+
This file is part of Velvet.
|
|
5
|
+
|
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with Velvet; if not, write to the Free Software
|
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
19
|
+
|
|
20
|
+
*/
|
|
21
|
+
#include <stdlib.h>
|
|
22
|
+
#include <stdio.h>
|
|
23
|
+
#include <string.h>
|
|
24
|
+
#include <ctype.h>
|
|
25
|
+
|
|
26
|
+
#ifdef _OPENMP
|
|
27
|
+
#include <omp.h>
|
|
28
|
+
#endif
|
|
29
|
+
|
|
30
|
+
#include "globals.h"
|
|
31
|
+
#include "allocArray.h"
|
|
32
|
+
#include "preGraph.h"
|
|
33
|
+
#include "recycleBin.h"
|
|
34
|
+
#include "tightString.h"
|
|
35
|
+
#include "run.h"
|
|
36
|
+
#include "utility.h"
|
|
37
|
+
|
|
38
|
+
#define ADENINE 0
|
|
39
|
+
#define CYTOSINE 1
|
|
40
|
+
#define GUANINE 2
|
|
41
|
+
#define THYMINE 3
|
|
42
|
+
|
|
43
|
+
struct preMarker_st {
|
|
44
|
+
PreMarker * previous;
|
|
45
|
+
PreMarker * next;
|
|
46
|
+
IDnum referenceStart;
|
|
47
|
+
IDnum preNodeStart;
|
|
48
|
+
IDnum length;
|
|
49
|
+
IDnum referenceID;
|
|
50
|
+
IDnum preNodeID; /* SF TODO only the sign seems to matter. Could replace with char or bit field */
|
|
51
|
+
} ATTRIBUTE_PACKED;
|
|
52
|
+
|
|
53
|
+
typedef struct preArc_st PreArc;
|
|
54
|
+
|
|
55
|
+
struct preArc_st {
|
|
56
|
+
PreArcI nextLeft; /* Index of the previous PreArc */
|
|
57
|
+
PreArcI nextRight; /* Index of the next PreArc */
|
|
58
|
+
IDnum multiplicity;
|
|
59
|
+
IDnum preNodeIDLeft;
|
|
60
|
+
IDnum preNodeIDRight;
|
|
61
|
+
} ATTRIBUTE_PACKED;
|
|
62
|
+
|
|
63
|
+
struct preNode_st {
|
|
64
|
+
PreArcI preArcLeft;
|
|
65
|
+
PreArcI preArcRight;
|
|
66
|
+
Descriptor *descriptor;
|
|
67
|
+
IDnum length;
|
|
68
|
+
} ATTRIBUTE_PACKED;
|
|
69
|
+
|
|
70
|
+
struct preGraph_st {
|
|
71
|
+
PreNode *preNodes;
|
|
72
|
+
IDnum * nodeReferenceMarkerCounts;
|
|
73
|
+
PreMarker ** nodeReferenceMarkers;
|
|
74
|
+
IDnum sequenceCount;
|
|
75
|
+
IDnum referenceCount;
|
|
76
|
+
IDnum preNodeCount;
|
|
77
|
+
int wordLength;
|
|
78
|
+
boolean double_strand;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
static AllocArray *preArcMemory = NULL;
|
|
82
|
+
|
|
83
|
+
DECLARE_FAST_ACCESSORS(PREARC, PreArc, preArcMemory)
|
|
84
|
+
|
|
85
|
+
PreArcI allocatePreArc_pg()
|
|
86
|
+
{
|
|
87
|
+
#ifdef _OPENMP
|
|
88
|
+
return allocArrayArrayAllocate (preArcMemory);
|
|
89
|
+
#else
|
|
90
|
+
if (preArcMemory == NULL)
|
|
91
|
+
preArcMemory = newAllocArray(sizeof(PreArc), "PreArc");
|
|
92
|
+
return allocArrayAllocate (preArcMemory);
|
|
93
|
+
#endif
|
|
94
|
+
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
void deallocatePreArc_pg(PreArcI preArc)
|
|
98
|
+
{
|
|
99
|
+
#ifdef _OPENMP
|
|
100
|
+
allocArrayArrayFree (preArcMemory, preArc);
|
|
101
|
+
#else
|
|
102
|
+
allocArrayFree (preArcMemory, preArc);
|
|
103
|
+
#endif
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Returns the length of the preNode's descriptor list
|
|
107
|
+
Coordinate getPreNodeLength_pg(IDnum preNodeID, PreGraph * preGraph)
|
|
108
|
+
{
|
|
109
|
+
IDnum ID = preNodeID;
|
|
110
|
+
|
|
111
|
+
if (ID < 0)
|
|
112
|
+
ID = -ID;
|
|
113
|
+
|
|
114
|
+
return (preGraph->preNodes[ID]).length;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Returns the number of preNodes in the preGraph
|
|
118
|
+
IDnum preNodeCount_pg(PreGraph * preGraph)
|
|
119
|
+
{
|
|
120
|
+
return preGraph->preNodeCount;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// returns the number of sequences used to buid the preGraph
|
|
124
|
+
IDnum sequenceCount_pg(PreGraph * preGraph)
|
|
125
|
+
{
|
|
126
|
+
return preGraph->sequenceCount;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
PreArcI getPreArcBetweenPreNodes_pg(IDnum originPreNodeID,
|
|
130
|
+
IDnum destinationPreNodeID,
|
|
131
|
+
PreGraph * preGraph)
|
|
132
|
+
{
|
|
133
|
+
PreArcI preArc;
|
|
134
|
+
|
|
135
|
+
if (originPreNodeID == 0 || destinationPreNodeID == 0) {
|
|
136
|
+
return NULL_IDX;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
for (preArc = getPreArc_pg(originPreNodeID, preGraph);
|
|
140
|
+
preArc != NULL_IDX;
|
|
141
|
+
preArc = getNextPreArc_pg(preArc, originPreNodeID)) {
|
|
142
|
+
if (getDestination_pg(preArc, originPreNodeID) ==
|
|
143
|
+
destinationPreNodeID) {
|
|
144
|
+
return preArc;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return NULL_IDX;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
static void addPreArcToPreNode_pg(PreArcI preArc, IDnum preNodeID,
|
|
152
|
+
PreGraph * preGraph)
|
|
153
|
+
{
|
|
154
|
+
IDnum ID = preNodeID;
|
|
155
|
+
PreNode *preNode;
|
|
156
|
+
PreArcI *preArcPtr;
|
|
157
|
+
PreArc *preArcVal;
|
|
158
|
+
|
|
159
|
+
if (ID < 0)
|
|
160
|
+
ID = -ID;
|
|
161
|
+
|
|
162
|
+
preNode = &(preGraph->preNodes[ID]);
|
|
163
|
+
|
|
164
|
+
if (preNodeID > 0)
|
|
165
|
+
preArcPtr = &(preNode->preArcRight);
|
|
166
|
+
else
|
|
167
|
+
preArcPtr = &(preNode->preArcLeft);
|
|
168
|
+
|
|
169
|
+
preArcVal = PREARC_I2P (preArc);
|
|
170
|
+
preArcVal = PREARC_I2P (preArc);
|
|
171
|
+
|
|
172
|
+
if (preNodeID == preArcVal->preNodeIDLeft) {
|
|
173
|
+
preArcVal->nextLeft = *preArcPtr;
|
|
174
|
+
*preArcPtr = preArc;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
if (preNodeID == preArcVal->preNodeIDRight) {
|
|
178
|
+
preArcVal->nextRight = *preArcPtr;
|
|
179
|
+
*preArcPtr = preArc;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Creates an preArc from preNode origin to preNode destination.
|
|
184
|
+
// If this preArc already exists, increments its multiplicity by 1.
|
|
185
|
+
PreArcI createPreArc_pg(IDnum originPreNodeID, IDnum destinationPreNodeID,
|
|
186
|
+
PreGraph * preGraph)
|
|
187
|
+
{
|
|
188
|
+
PreArcI preArc;
|
|
189
|
+
PreArc *preArcVal;
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
if (originPreNodeID == 0 || destinationPreNodeID == 0)
|
|
193
|
+
return NULL_IDX;
|
|
194
|
+
|
|
195
|
+
preArc =
|
|
196
|
+
getPreArcBetweenPreNodes_pg(originPreNodeID,
|
|
197
|
+
destinationPreNodeID, preGraph);
|
|
198
|
+
|
|
199
|
+
if (preArc != NULL_IDX) {
|
|
200
|
+
PREARC_FI2P (preArc)->multiplicity++;
|
|
201
|
+
if (destinationPreNodeID == -originPreNodeID)
|
|
202
|
+
PREARC_FI2P (preArc)->multiplicity++;
|
|
203
|
+
return preArc;
|
|
204
|
+
}
|
|
205
|
+
// If not found
|
|
206
|
+
preArc = allocatePreArc_pg();
|
|
207
|
+
preArcVal = PREARC_FI2P (preArc);
|
|
208
|
+
preArcVal->preNodeIDLeft = originPreNodeID;
|
|
209
|
+
preArcVal->preNodeIDRight = -destinationPreNodeID;
|
|
210
|
+
preArcVal->multiplicity = 1;
|
|
211
|
+
|
|
212
|
+
addPreArcToPreNode_pg(preArc, originPreNodeID, preGraph);
|
|
213
|
+
|
|
214
|
+
// Hairpin case
|
|
215
|
+
if (destinationPreNodeID == -originPreNodeID) {
|
|
216
|
+
preArcVal->multiplicity++;
|
|
217
|
+
return preArc;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
addPreArcToPreNode_pg(preArc, -destinationPreNodeID, preGraph);
|
|
221
|
+
|
|
222
|
+
return preArc;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
void createAnalogousPreArc_pg(IDnum originPreNodeID,
|
|
226
|
+
IDnum destinationPreNodeID,
|
|
227
|
+
PreArcI refPreArc, PreGraph * preGraph)
|
|
228
|
+
{
|
|
229
|
+
PreArcI preArc;
|
|
230
|
+
PreArc *preArcVal;
|
|
231
|
+
|
|
232
|
+
if (originPreNodeID == 0 || destinationPreNodeID == 0)
|
|
233
|
+
return;
|
|
234
|
+
|
|
235
|
+
preArc =
|
|
236
|
+
getPreArcBetweenPreNodes_pg(originPreNodeID,
|
|
237
|
+
destinationPreNodeID, preGraph);
|
|
238
|
+
|
|
239
|
+
if (preArc != NULL_IDX) {
|
|
240
|
+
PREARC_FI2P (preArc)->multiplicity += PREARC_FI2P (refPreArc)->multiplicity;
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
243
|
+
// If not found
|
|
244
|
+
preArc = allocatePreArc_pg();
|
|
245
|
+
preArcVal = PREARC_FI2P (preArc);
|
|
246
|
+
preArcVal->preNodeIDLeft = originPreNodeID;
|
|
247
|
+
preArcVal->preNodeIDRight = -destinationPreNodeID;
|
|
248
|
+
preArcVal->multiplicity = PREARC_FI2P (refPreArc)->multiplicity;
|
|
249
|
+
|
|
250
|
+
addPreArcToPreNode_pg(preArc, originPreNodeID, preGraph);
|
|
251
|
+
|
|
252
|
+
// Hairpin case
|
|
253
|
+
if (destinationPreNodeID == -originPreNodeID)
|
|
254
|
+
return;
|
|
255
|
+
|
|
256
|
+
addPreArcToPreNode_pg(preArc, -destinationPreNodeID, preGraph);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
static void setNextPreArc_pg(PreArcI preArc, IDnum preNodeID,
|
|
260
|
+
PreArcI nextPreArc)
|
|
261
|
+
{
|
|
262
|
+
PreArc *preArcVal;
|
|
263
|
+
|
|
264
|
+
preArcVal = PREARC_FI2P (preArc);
|
|
265
|
+
if (preNodeID == preArcVal->preNodeIDLeft)
|
|
266
|
+
preArcVal->nextLeft = nextPreArc;
|
|
267
|
+
if (preNodeID == preArcVal->preNodeIDRight)
|
|
268
|
+
preArcVal->nextRight = nextPreArc;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
void removePreArcFromList_pg(PreArcI preArc, IDnum preNodeID,
|
|
272
|
+
PreGraph * preGraph)
|
|
273
|
+
{
|
|
274
|
+
IDnum ID = preNodeID;
|
|
275
|
+
PreNode *preNode;
|
|
276
|
+
PreArcI *preArcPtr;
|
|
277
|
+
PreArcI tempPreArc;
|
|
278
|
+
|
|
279
|
+
if (ID < 0)
|
|
280
|
+
ID = -ID;
|
|
281
|
+
|
|
282
|
+
preNode = &(preGraph->preNodes[ID]);
|
|
283
|
+
|
|
284
|
+
if (preNodeID > 0)
|
|
285
|
+
preArcPtr = &(preNode->preArcRight);
|
|
286
|
+
else
|
|
287
|
+
preArcPtr = &(preNode->preArcLeft);
|
|
288
|
+
|
|
289
|
+
if (*preArcPtr == preArc) {
|
|
290
|
+
*preArcPtr = getNextPreArc_pg(preArc, preNodeID);
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
for (tempPreArc = *preArcPtr; tempPreArc != NULL_IDX;
|
|
295
|
+
tempPreArc = getNextPreArc_pg(tempPreArc, preNodeID))
|
|
296
|
+
if (getNextPreArc_pg(tempPreArc, preNodeID) == preArc)
|
|
297
|
+
setNextPreArc_pg(tempPreArc, preNodeID,
|
|
298
|
+
getNextPreArc_pg(preArc,
|
|
299
|
+
preNodeID));
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
void destroyPreArc_pg(PreArcI preArc, PreGraph * preGraph)
|
|
303
|
+
{
|
|
304
|
+
IDnum leftID, rightID;
|
|
305
|
+
PreArc *preArcVal;
|
|
306
|
+
|
|
307
|
+
if (preArc == NULL_IDX)
|
|
308
|
+
return;
|
|
309
|
+
|
|
310
|
+
preArcVal = PREARC_FI2P (preArc);
|
|
311
|
+
leftID = preArcVal->preNodeIDLeft;
|
|
312
|
+
rightID = preArcVal->preNodeIDRight;
|
|
313
|
+
|
|
314
|
+
// Removing preArc from list
|
|
315
|
+
removePreArcFromList_pg(preArc, leftID, preGraph);
|
|
316
|
+
|
|
317
|
+
// Removing preArc's twin from list
|
|
318
|
+
if (rightID != leftID)
|
|
319
|
+
removePreArcFromList_pg(preArc, rightID, preGraph);
|
|
320
|
+
|
|
321
|
+
deallocatePreArc_pg(preArc);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
void destroyPreNode_pg(IDnum preNodeID, PreGraph * preGraph)
|
|
325
|
+
{
|
|
326
|
+
PreNode *preNode;
|
|
327
|
+
IDnum ID = preNodeID;
|
|
328
|
+
IDnum index;
|
|
329
|
+
PreMarker * preMarker;
|
|
330
|
+
|
|
331
|
+
//velvetLog("Destroying %ld\n", (long) preNodeID);
|
|
332
|
+
|
|
333
|
+
if (ID < 0)
|
|
334
|
+
ID = -ID;
|
|
335
|
+
|
|
336
|
+
preNode = &(preGraph->preNodes[ID]);
|
|
337
|
+
|
|
338
|
+
// PreNode preArcs:
|
|
339
|
+
while (preNode->preArcLeft != NULL_IDX)
|
|
340
|
+
destroyPreArc_pg(preNode->preArcLeft, preGraph);
|
|
341
|
+
while (preNode->preArcRight != NULL_IDX)
|
|
342
|
+
destroyPreArc_pg(preNode->preArcRight, preGraph);
|
|
343
|
+
|
|
344
|
+
// PreMarkers
|
|
345
|
+
if (preGraph->nodeReferenceMarkers) {
|
|
346
|
+
for (index = 0; index < preGraph->nodeReferenceMarkerCounts[ID]; index++) {
|
|
347
|
+
preMarker = &(preGraph->nodeReferenceMarkers[ID][index]);
|
|
348
|
+
if (preMarker->previous != NULL)
|
|
349
|
+
preMarker->previous->next = NULL;
|
|
350
|
+
if (preMarker->next != NULL)
|
|
351
|
+
preMarker->next->previous = NULL;
|
|
352
|
+
preMarker->preNodeID = 0;
|
|
353
|
+
preMarker->referenceID = 0;
|
|
354
|
+
}
|
|
355
|
+
if (preGraph->nodeReferenceMarkers[ID])
|
|
356
|
+
free(preGraph->nodeReferenceMarkers[ID]);
|
|
357
|
+
preGraph->nodeReferenceMarkers[ID] = NULL;
|
|
358
|
+
preGraph->nodeReferenceMarkerCounts[ID] = 0;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Descriptors
|
|
362
|
+
free(preNode->descriptor);
|
|
363
|
+
|
|
364
|
+
// Flag as destroyed
|
|
365
|
+
preNode->descriptor = NULL;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
void destroyPreGraph_pg(PreGraph * preGraph)
|
|
369
|
+
{
|
|
370
|
+
IDnum index;
|
|
371
|
+
PreNode *preNode = &(preGraph->preNodes[1]);
|
|
372
|
+
|
|
373
|
+
// Descriptors
|
|
374
|
+
for (index = 1; index <= preGraph->preNodeCount; index++) {
|
|
375
|
+
free(preNode->descriptor);
|
|
376
|
+
preNode++;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Arcs
|
|
380
|
+
#ifdef _OPENMP
|
|
381
|
+
destroyAllocArrayArray(preArcMemory);
|
|
382
|
+
#else
|
|
383
|
+
destroyAllocArray(preArcMemory);
|
|
384
|
+
#endif
|
|
385
|
+
|
|
386
|
+
// Nodes
|
|
387
|
+
free(preGraph->preNodes);
|
|
388
|
+
|
|
389
|
+
// PreMarkers
|
|
390
|
+
if (preGraph->nodeReferenceMarkerCounts) {
|
|
391
|
+
free(preGraph->nodeReferenceMarkerCounts);
|
|
392
|
+
free(preGraph->nodeReferenceMarkers);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Graph
|
|
396
|
+
free(preGraph);
|
|
397
|
+
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
static Nucleotide getNucleotideInDescriptor_pg(Descriptor * descriptor,
|
|
401
|
+
Coordinate i)
|
|
402
|
+
{
|
|
403
|
+
Descriptor *fourMer = descriptor + i / 4;
|
|
404
|
+
|
|
405
|
+
switch (i % 4) {
|
|
406
|
+
case 0:
|
|
407
|
+
return (*fourMer & 3);
|
|
408
|
+
case 1:
|
|
409
|
+
return (*fourMer & 12) >> 2;
|
|
410
|
+
case 2:
|
|
411
|
+
return (*fourMer & 48) >> 4;
|
|
412
|
+
case 3:
|
|
413
|
+
return (*fourMer & 192) >> 6;
|
|
414
|
+
}
|
|
415
|
+
return 0;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
PreNode *getPreNodeInPreGraph_pg(PreGraph * preGraph, IDnum preNodeID)
|
|
419
|
+
{
|
|
420
|
+
PreNode *preNode;
|
|
421
|
+
if (preNodeID <= 0)
|
|
422
|
+
abort();
|
|
423
|
+
else {
|
|
424
|
+
preNode = &(preGraph->preNodes[preNodeID]);
|
|
425
|
+
if (preNode->descriptor != NULL)
|
|
426
|
+
return preNode;
|
|
427
|
+
else
|
|
428
|
+
return NULL;
|
|
429
|
+
}
|
|
430
|
+
return NULL;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
PreArcI getPreArc_pg(IDnum preNodeID, PreGraph * preGraph)
|
|
434
|
+
{
|
|
435
|
+
IDnum ID = preNodeID;
|
|
436
|
+
PreNode *preNode;
|
|
437
|
+
|
|
438
|
+
if (ID < 0)
|
|
439
|
+
ID = -ID;
|
|
440
|
+
|
|
441
|
+
preNode = &(preGraph->preNodes[ID]);
|
|
442
|
+
|
|
443
|
+
if (preNodeID > 0)
|
|
444
|
+
return preNode->preArcRight;
|
|
445
|
+
else
|
|
446
|
+
return preNode->preArcLeft;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
PreArcI getNextPreArc_pg(PreArcI preArc, IDnum preNodeID)
|
|
450
|
+
{
|
|
451
|
+
PreArc *preArcVal;
|
|
452
|
+
|
|
453
|
+
preArcVal = PREARC_FI2P (preArc);
|
|
454
|
+
|
|
455
|
+
if (preNodeID == preArcVal->preNodeIDLeft) {
|
|
456
|
+
return preArcVal->nextLeft;
|
|
457
|
+
} else {
|
|
458
|
+
return preArcVal->nextRight;
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
IDnum getMultiplicity_pg(PreArcI preArc)
|
|
463
|
+
{
|
|
464
|
+
if (preArc == NULL_IDX)
|
|
465
|
+
return 0;
|
|
466
|
+
|
|
467
|
+
return PREARC_FI2P (preArc)->multiplicity;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
IDnum getOtherEnd_pg(PreArcI preArc, IDnum preNodeID)
|
|
471
|
+
{
|
|
472
|
+
PreArc *preArcVal;
|
|
473
|
+
|
|
474
|
+
preArcVal = PREARC_FI2P (preArc);
|
|
475
|
+
if (preNodeID == preArcVal->preNodeIDLeft)
|
|
476
|
+
return preArcVal->preNodeIDRight;
|
|
477
|
+
else
|
|
478
|
+
return preArcVal->preNodeIDLeft;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
IDnum getDestination_pg(PreArcI preArc, IDnum preNodeID)
|
|
482
|
+
{
|
|
483
|
+
PreArc *preArcVal;
|
|
484
|
+
|
|
485
|
+
if (preArc == NULL_IDX)
|
|
486
|
+
return 0;
|
|
487
|
+
|
|
488
|
+
preArcVal = PREARC_FI2P (preArc);
|
|
489
|
+
|
|
490
|
+
if (preNodeID == preArcVal->preNodeIDLeft)
|
|
491
|
+
return -preArcVal->preNodeIDRight;
|
|
492
|
+
else
|
|
493
|
+
return -preArcVal->preNodeIDLeft;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
static void writeNucleotideInDescriptor_pg(Nucleotide nucleotide,
|
|
497
|
+
Descriptor * descriptor,
|
|
498
|
+
Coordinate i)
|
|
499
|
+
{
|
|
500
|
+
Descriptor *fourMer = descriptor + i / 4;
|
|
501
|
+
switch (i % 4) {
|
|
502
|
+
case 3:
|
|
503
|
+
*fourMer &= 63;
|
|
504
|
+
*fourMer += nucleotide << 6;
|
|
505
|
+
return;
|
|
506
|
+
case 2:
|
|
507
|
+
*fourMer &= 207;
|
|
508
|
+
*fourMer += nucleotide << 4;
|
|
509
|
+
return;
|
|
510
|
+
case 1:
|
|
511
|
+
*fourMer &= 243;
|
|
512
|
+
*fourMer += nucleotide << 2;
|
|
513
|
+
return;
|
|
514
|
+
case 0:
|
|
515
|
+
*fourMer &= 252;
|
|
516
|
+
*fourMer += nucleotide;
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
static inline Descriptor *mergeDescriptors_pg(Descriptor * descr,
|
|
521
|
+
Coordinate destinationLength,
|
|
522
|
+
Descriptor * copy,
|
|
523
|
+
Coordinate sourceLength,
|
|
524
|
+
int wordLength)
|
|
525
|
+
{
|
|
526
|
+
Descriptor *readPtr, *writePtr;
|
|
527
|
+
Descriptor readCopy = 0;
|
|
528
|
+
int readOffset, writeOffset;
|
|
529
|
+
size_t arrayLength;
|
|
530
|
+
Coordinate newLength =
|
|
531
|
+
destinationLength + sourceLength + wordLength - 1;
|
|
532
|
+
Descriptor *new;
|
|
533
|
+
Coordinate index;
|
|
534
|
+
|
|
535
|
+
// Specify new array
|
|
536
|
+
arrayLength = newLength / 4;
|
|
537
|
+
if (newLength % 4)
|
|
538
|
+
arrayLength++;
|
|
539
|
+
new = callocOrExit(arrayLength, Descriptor);
|
|
540
|
+
for (index = 0; index < arrayLength; index++)
|
|
541
|
+
new[index] = 0;
|
|
542
|
+
|
|
543
|
+
// Copying first descriptor
|
|
544
|
+
readPtr = descr;
|
|
545
|
+
writePtr = new;
|
|
546
|
+
writeOffset = 0;
|
|
547
|
+
for (index = 0; index < destinationLength + wordLength - 1;
|
|
548
|
+
index++) {
|
|
549
|
+
(*writePtr) >>= 2;
|
|
550
|
+
if (writeOffset == 0)
|
|
551
|
+
readCopy = *readPtr;
|
|
552
|
+
(*writePtr) += (readCopy & 3) << 6;
|
|
553
|
+
|
|
554
|
+
/*switch ((readCopy & 3)) {
|
|
555
|
+
case ADENINE:
|
|
556
|
+
velvetLog("A%ld", index);
|
|
557
|
+
break;
|
|
558
|
+
case CYTOSINE:
|
|
559
|
+
velvetLog("C%ld", index);
|
|
560
|
+
break;
|
|
561
|
+
case GUANINE:
|
|
562
|
+
velvetLog("G%ld", index);
|
|
563
|
+
break;
|
|
564
|
+
case THYMINE:
|
|
565
|
+
velvetLog("T%ld", index);
|
|
566
|
+
break;
|
|
567
|
+
} */
|
|
568
|
+
readCopy >>= 2;
|
|
569
|
+
|
|
570
|
+
writeOffset++;
|
|
571
|
+
if (writeOffset == 4) {
|
|
572
|
+
writePtr++;
|
|
573
|
+
readPtr++;
|
|
574
|
+
writeOffset = 0;
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
//velvetLog("\n");
|
|
579
|
+
|
|
580
|
+
// Skipping initial k-1 letters in second descriptor
|
|
581
|
+
readPtr = &(copy[(wordLength - 1) / 4]);
|
|
582
|
+
readCopy = *readPtr;
|
|
583
|
+
readOffset = (wordLength - 1) % 4;
|
|
584
|
+
readCopy >>= (readOffset * 2);
|
|
585
|
+
|
|
586
|
+
// Going on copying second descriptor
|
|
587
|
+
for (index = 0; index < sourceLength; index++) {
|
|
588
|
+
(*writePtr) >>= 2;
|
|
589
|
+
if (readOffset == 0)
|
|
590
|
+
readCopy = *readPtr;
|
|
591
|
+
(*writePtr) += (readCopy & 3) << 6;
|
|
592
|
+
/*switch ((readCopy & 3)) {
|
|
593
|
+
case ADENINE:
|
|
594
|
+
velvetLog("A%ld", index);
|
|
595
|
+
break;
|
|
596
|
+
case CYTOSINE:
|
|
597
|
+
velvetLog("C%ld", index);
|
|
598
|
+
break;
|
|
599
|
+
case GUANINE:
|
|
600
|
+
velvetLog("G%ld", index);
|
|
601
|
+
break;
|
|
602
|
+
case THYMINE:
|
|
603
|
+
velvetLog("T%ld", index);
|
|
604
|
+
break;
|
|
605
|
+
default:
|
|
606
|
+
velvetLog("?%ld;", index);
|
|
607
|
+
} */
|
|
608
|
+
readCopy >>= 2;
|
|
609
|
+
|
|
610
|
+
writeOffset++;
|
|
611
|
+
if (writeOffset == 4) {
|
|
612
|
+
writePtr++;
|
|
613
|
+
writeOffset = 0;
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
readOffset++;
|
|
617
|
+
if (readOffset == 4) {
|
|
618
|
+
readPtr++;
|
|
619
|
+
readOffset = 0;
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
//velvetLog("\n");
|
|
624
|
+
|
|
625
|
+
if (writeOffset != 0) {
|
|
626
|
+
while (writeOffset != 4) {
|
|
627
|
+
(*writePtr) >>= 2;
|
|
628
|
+
writeOffset++;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
return new;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
static inline Descriptor *mergeDescriptorsH2H_pg(Descriptor * descr,
|
|
636
|
+
Coordinate
|
|
637
|
+
destinationLength,
|
|
638
|
+
Descriptor * copy,
|
|
639
|
+
Coordinate sourceLength,
|
|
640
|
+
int wordLength)
|
|
641
|
+
{
|
|
642
|
+
Descriptor *readPtr, *writePtr;
|
|
643
|
+
Descriptor readCopy;
|
|
644
|
+
int readOffset, writeOffset;
|
|
645
|
+
size_t arrayLength;
|
|
646
|
+
Coordinate newLength =
|
|
647
|
+
destinationLength + sourceLength + wordLength - 1;
|
|
648
|
+
Descriptor *new;
|
|
649
|
+
Coordinate index;
|
|
650
|
+
|
|
651
|
+
// Specify new array
|
|
652
|
+
arrayLength = newLength / 4;
|
|
653
|
+
if (newLength % 4)
|
|
654
|
+
arrayLength++;
|
|
655
|
+
new = callocOrExit(arrayLength, Descriptor);
|
|
656
|
+
for (index = 0; index < arrayLength; index++)
|
|
657
|
+
new[index] = 0;
|
|
658
|
+
|
|
659
|
+
// Copying first descriptor (including final (k-1)-mer)
|
|
660
|
+
readPtr = descr;
|
|
661
|
+
readCopy = *readPtr;
|
|
662
|
+
writePtr = new;
|
|
663
|
+
writeOffset = 0;
|
|
664
|
+
readOffset = 0;
|
|
665
|
+
for (index = 0; index < destinationLength + wordLength - 1;
|
|
666
|
+
index++) {
|
|
667
|
+
(*writePtr) >>= 2;
|
|
668
|
+
if (writeOffset == 0)
|
|
669
|
+
readCopy = *readPtr;
|
|
670
|
+
(*writePtr) += (readCopy & 3) << 6;
|
|
671
|
+
/*switch ((readCopy & 3)) {
|
|
672
|
+
case ADENINE:
|
|
673
|
+
velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
|
|
674
|
+
break;
|
|
675
|
+
case CYTOSINE:
|
|
676
|
+
velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
|
|
677
|
+
break;
|
|
678
|
+
case GUANINE:
|
|
679
|
+
velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
|
|
680
|
+
break;
|
|
681
|
+
case THYMINE:
|
|
682
|
+
velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
|
|
683
|
+
break;
|
|
684
|
+
default:
|
|
685
|
+
velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
|
|
686
|
+
} */
|
|
687
|
+
readCopy >>= 2;
|
|
688
|
+
|
|
689
|
+
writeOffset++;
|
|
690
|
+
if (writeOffset == 4) {
|
|
691
|
+
writePtr++;
|
|
692
|
+
readPtr++;
|
|
693
|
+
writeOffset = 0;
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
//velvetLog("\n");
|
|
698
|
+
|
|
699
|
+
// Going to end of second descriptor
|
|
700
|
+
readPtr = &(copy[(sourceLength - 1) / 4]);
|
|
701
|
+
readCopy = *readPtr;
|
|
702
|
+
readOffset = (sourceLength - 1) % 4;
|
|
703
|
+
readCopy <<= ((3 - readOffset) * 2);
|
|
704
|
+
|
|
705
|
+
//velvetLog("Read copy %x\n", readCopy);
|
|
706
|
+
|
|
707
|
+
// Going on copying reverse complement of second descriptor
|
|
708
|
+
for (index = 0; index < sourceLength; index++) {
|
|
709
|
+
(*writePtr) >>= 2;
|
|
710
|
+
if (readOffset == 3)
|
|
711
|
+
readCopy = *readPtr;
|
|
712
|
+
#ifndef COLOR
|
|
713
|
+
(*writePtr) += 192 - (readCopy & 192);
|
|
714
|
+
#else
|
|
715
|
+
(*writePtr) += (readCopy & 192);
|
|
716
|
+
#endif
|
|
717
|
+
/*switch (3 - ((readCopy & 192) >> 6)) {
|
|
718
|
+
case ADENINE:
|
|
719
|
+
velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
|
|
720
|
+
break;
|
|
721
|
+
case CYTOSINE:
|
|
722
|
+
velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
|
|
723
|
+
break;
|
|
724
|
+
case GUANINE:
|
|
725
|
+
velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
|
|
726
|
+
break;
|
|
727
|
+
case THYMINE:
|
|
728
|
+
velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
|
|
729
|
+
break;
|
|
730
|
+
default:
|
|
731
|
+
velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
|
|
732
|
+
} */
|
|
733
|
+
readCopy <<= 2;
|
|
734
|
+
|
|
735
|
+
writeOffset++;
|
|
736
|
+
if (writeOffset == 4) {
|
|
737
|
+
writePtr++;
|
|
738
|
+
writeOffset = 0;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
readOffset--;
|
|
742
|
+
if (readOffset == -1) {
|
|
743
|
+
readPtr--;
|
|
744
|
+
readOffset = 3;
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
//velvetLog("\n");
|
|
749
|
+
|
|
750
|
+
if (writeOffset != 0) {
|
|
751
|
+
while (writeOffset != 4) {
|
|
752
|
+
(*writePtr) >>= 2;
|
|
753
|
+
writeOffset++;
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
return new;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
static inline Descriptor *mergeDescriptorsF2F_pg(Descriptor * descr,
|
|
761
|
+
Coordinate
|
|
762
|
+
destinationLength,
|
|
763
|
+
Descriptor * copy,
|
|
764
|
+
Coordinate sourceLength,
|
|
765
|
+
int wordLength)
|
|
766
|
+
{
|
|
767
|
+
Descriptor *readPtr, *writePtr;
|
|
768
|
+
Descriptor readCopy;
|
|
769
|
+
int readOffset, writeOffset;
|
|
770
|
+
size_t arrayLength;
|
|
771
|
+
Coordinate newLength =
|
|
772
|
+
destinationLength + sourceLength + wordLength - 1;
|
|
773
|
+
Descriptor *new;
|
|
774
|
+
Coordinate index;
|
|
775
|
+
|
|
776
|
+
// Specify new array
|
|
777
|
+
arrayLength = newLength / 4;
|
|
778
|
+
if (newLength % 4)
|
|
779
|
+
arrayLength++;
|
|
780
|
+
new = callocOrExit(arrayLength, Descriptor);
|
|
781
|
+
for (index = 0; index < arrayLength; index++)
|
|
782
|
+
new[index] = 0;
|
|
783
|
+
|
|
784
|
+
writePtr = new;
|
|
785
|
+
writeOffset = 0;
|
|
786
|
+
|
|
787
|
+
// Going to end of first descriptor
|
|
788
|
+
readPtr = &(copy[(sourceLength + wordLength - 2) / 4]);
|
|
789
|
+
readCopy = *readPtr;
|
|
790
|
+
readOffset = (sourceLength + wordLength - 2) % 4;
|
|
791
|
+
readCopy <<= ((3 - readOffset) * 2);
|
|
792
|
+
|
|
793
|
+
// Copying reverse complement of first descriptor (minus final (k-1)-mer)
|
|
794
|
+
for (index = 0; index < sourceLength; index++) {
|
|
795
|
+
(*writePtr) >>= 2;
|
|
796
|
+
if (readOffset == 3)
|
|
797
|
+
readCopy = *readPtr;
|
|
798
|
+
#ifndef COLOR
|
|
799
|
+
(*writePtr) += 192 - (readCopy & 192);
|
|
800
|
+
#else
|
|
801
|
+
(*writePtr) += (readCopy & 192);
|
|
802
|
+
#endif
|
|
803
|
+
/*switch (3 - ((readCopy & 192) >> 6)) {
|
|
804
|
+
case ADENINE:
|
|
805
|
+
velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
|
|
806
|
+
break;
|
|
807
|
+
case CYTOSINE:
|
|
808
|
+
velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
|
|
809
|
+
break;
|
|
810
|
+
case GUANINE:
|
|
811
|
+
velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
|
|
812
|
+
break;
|
|
813
|
+
case THYMINE:
|
|
814
|
+
velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
|
|
815
|
+
break;
|
|
816
|
+
default:
|
|
817
|
+
velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
|
|
818
|
+
} */
|
|
819
|
+
readCopy <<= 2;
|
|
820
|
+
|
|
821
|
+
writeOffset++;
|
|
822
|
+
if (writeOffset == 4) {
|
|
823
|
+
writePtr++;
|
|
824
|
+
writeOffset = 0;
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
readOffset--;
|
|
828
|
+
if (readOffset == -1) {
|
|
829
|
+
readPtr--;
|
|
830
|
+
readOffset = 3;
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
//velvetLog("\n");
|
|
835
|
+
|
|
836
|
+
// Going on copying second descriptor
|
|
837
|
+
readPtr = descr;
|
|
838
|
+
readCopy = *readPtr;
|
|
839
|
+
readOffset = 0;
|
|
840
|
+
|
|
841
|
+
for (index = 0; index < destinationLength + wordLength - 1;
|
|
842
|
+
index++) {
|
|
843
|
+
(*writePtr) >>= 2;
|
|
844
|
+
if (readOffset == 0)
|
|
845
|
+
readCopy = *readPtr;
|
|
846
|
+
(*writePtr) += (readCopy & 3) << 6;
|
|
847
|
+
/*switch ((readCopy & 3)) {
|
|
848
|
+
case ADENINE:
|
|
849
|
+
velvetLog("A(%ld %i %i) ", index, writeOffset, readOffset);
|
|
850
|
+
break;
|
|
851
|
+
case CYTOSINE:
|
|
852
|
+
velvetLog("C(%ld %i %i) ", index, writeOffset, readOffset);
|
|
853
|
+
break;
|
|
854
|
+
case GUANINE:
|
|
855
|
+
velvetLog("G(%ld %i %i) ", index, writeOffset, readOffset);
|
|
856
|
+
break;
|
|
857
|
+
case THYMINE:
|
|
858
|
+
velvetLog("T(%ld %i %i) ", index, writeOffset, readOffset);
|
|
859
|
+
break;
|
|
860
|
+
default:
|
|
861
|
+
velvetLog("?(%ld %i %i);", index, writeOffset, readOffset);
|
|
862
|
+
} */
|
|
863
|
+
readCopy >>= 2;
|
|
864
|
+
|
|
865
|
+
writeOffset++;
|
|
866
|
+
if (writeOffset == 4) {
|
|
867
|
+
writePtr++;
|
|
868
|
+
writeOffset = 0;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
readOffset++;
|
|
872
|
+
if (readOffset == 4) {
|
|
873
|
+
readPtr++;
|
|
874
|
+
readOffset = 0;
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
//velvetLog("\n");
|
|
879
|
+
|
|
880
|
+
if (writeOffset != 0) {
|
|
881
|
+
while (writeOffset != 4) {
|
|
882
|
+
(*writePtr) >>= 2;
|
|
883
|
+
writeOffset++;
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
return new;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
void setMultiplicity_pg(PreArcI preArc, IDnum mult)
|
|
891
|
+
{
|
|
892
|
+
PREARC_FI2P (preArc)->multiplicity = mult;
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
static void updatePreArcData_pg(PreArcI preArc, IDnum oldPreNodeID,
|
|
896
|
+
IDnum newPreNodeID)
|
|
897
|
+
{
|
|
898
|
+
PreArc *preArcVal;
|
|
899
|
+
|
|
900
|
+
preArcVal = PREARC_FI2P (preArc);
|
|
901
|
+
if (preArcVal->preNodeIDLeft == oldPreNodeID)
|
|
902
|
+
preArcVal->preNodeIDLeft = newPreNodeID;
|
|
903
|
+
if (preArcVal->preNodeIDRight == oldPreNodeID)
|
|
904
|
+
preArcVal->preNodeIDRight = newPreNodeID;
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
// Reshuffles the preGraph->preNodes array to remove NULL pointers
|
|
908
|
+
// Beware that preNode IDs are accordingly reshuffled (all pointers remain valid though)
|
|
909
|
+
void renumberPreNodes_pg(PreGraph * preGraph)
|
|
910
|
+
{
|
|
911
|
+
IDnum preNodeIndex;
|
|
912
|
+
PreNode *currentPreNode, *destinationPreNode;
|
|
913
|
+
IDnum counter = 0;
|
|
914
|
+
IDnum preNodes = preGraph->preNodeCount;
|
|
915
|
+
IDnum newIndex;
|
|
916
|
+
IDnum preMarkerIndex;
|
|
917
|
+
PreMarker * preMarker;
|
|
918
|
+
PreArcI preArc;
|
|
919
|
+
|
|
920
|
+
velvetLog("Renumbering preNodes\n");
|
|
921
|
+
velvetLog("Initial preNode count %li\n", (long) preGraph->preNodeCount);
|
|
922
|
+
|
|
923
|
+
for (preNodeIndex = 1; preNodeIndex <= preNodes; preNodeIndex++) {
|
|
924
|
+
currentPreNode = &(preGraph->preNodes[preNodeIndex]);
|
|
925
|
+
|
|
926
|
+
if (currentPreNode->descriptor == NULL)
|
|
927
|
+
counter++;
|
|
928
|
+
else if (counter != 0) {
|
|
929
|
+
newIndex = preNodeIndex - counter;
|
|
930
|
+
destinationPreNode =
|
|
931
|
+
&(preGraph->preNodes[newIndex]);
|
|
932
|
+
|
|
933
|
+
destinationPreNode->preArcLeft =
|
|
934
|
+
currentPreNode->preArcLeft;
|
|
935
|
+
destinationPreNode->preArcRight =
|
|
936
|
+
currentPreNode->preArcRight;
|
|
937
|
+
destinationPreNode->descriptor =
|
|
938
|
+
currentPreNode->descriptor;
|
|
939
|
+
destinationPreNode->length =
|
|
940
|
+
currentPreNode->length;
|
|
941
|
+
|
|
942
|
+
for (preArc = getPreArc_pg(newIndex, preGraph);
|
|
943
|
+
preArc != NULL_IDX;
|
|
944
|
+
preArc = getNextPreArc_pg(preArc, newIndex))
|
|
945
|
+
updatePreArcData_pg(preArc, preNodeIndex,
|
|
946
|
+
newIndex);
|
|
947
|
+
for (preArc = getPreArc_pg(-newIndex, preGraph);
|
|
948
|
+
preArc != NULL_IDX;
|
|
949
|
+
preArc = getNextPreArc_pg(preArc, -newIndex))
|
|
950
|
+
updatePreArcData_pg(preArc, -preNodeIndex,
|
|
951
|
+
-newIndex);
|
|
952
|
+
|
|
953
|
+
if (preGraph->nodeReferenceMarkers) {
|
|
954
|
+
preGraph->nodeReferenceMarkerCounts[newIndex] = preGraph->nodeReferenceMarkerCounts[preNodeIndex];
|
|
955
|
+
preGraph->nodeReferenceMarkers[newIndex] = preGraph->nodeReferenceMarkers[preNodeIndex];
|
|
956
|
+
|
|
957
|
+
for (preMarkerIndex = 0; preMarkerIndex < preGraph->nodeReferenceMarkerCounts[newIndex]; preMarkerIndex++) {
|
|
958
|
+
preMarker = &(preGraph->nodeReferenceMarkers[newIndex][preMarkerIndex]);
|
|
959
|
+
if (preMarker->preNodeID == preNodeIndex)
|
|
960
|
+
preMarker->preNodeID = newIndex;
|
|
961
|
+
else if (preMarker->preNodeID == -preNodeIndex)
|
|
962
|
+
preMarker->preNodeID = -newIndex;
|
|
963
|
+
else
|
|
964
|
+
abort();
|
|
965
|
+
}
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
preGraph->preNodeCount -= counter;
|
|
971
|
+
preGraph->preNodes = reallocOrExit(preGraph->preNodes,
|
|
972
|
+
preGraph->preNodeCount +
|
|
973
|
+
1, PreNode);
|
|
974
|
+
|
|
975
|
+
velvetLog("Destroyed %li preNodes\n", (long) counter);
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
// Allocate memory for an empty preGraph created with sequenceCount different sequences
|
|
979
|
+
PreGraph *emptyPreGraph_pg(IDnum sequenceCount, IDnum referenceCount, int wordLength, boolean double_strand)
|
|
980
|
+
{
|
|
981
|
+
PreGraph *newPreGraph = mallocOrExit(1, PreGraph);
|
|
982
|
+
newPreGraph->sequenceCount = sequenceCount;
|
|
983
|
+
newPreGraph->wordLength = wordLength;
|
|
984
|
+
newPreGraph->preNodeCount = 0;
|
|
985
|
+
newPreGraph->double_strand = double_strand;
|
|
986
|
+
newPreGraph->referenceCount = referenceCount;
|
|
987
|
+
newPreGraph->preNodes = NULL;
|
|
988
|
+
newPreGraph->nodeReferenceMarkerCounts = NULL;
|
|
989
|
+
newPreGraph->nodeReferenceMarkers = NULL;
|
|
990
|
+
|
|
991
|
+
#ifdef _OPENMP
|
|
992
|
+
preArcMemory = newAllocArrayArray(omp_get_max_threads(), sizeof(PreArc), "PreArc");
|
|
993
|
+
#endif
|
|
994
|
+
|
|
995
|
+
return newPreGraph;
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
static Descriptor *newDescriptor_pg(Coordinate length, SequencesReader *seqReadInfo,
|
|
999
|
+
Kmer * initialKmer, int wordLength)
|
|
1000
|
+
{
|
|
1001
|
+
char letter;
|
|
1002
|
+
Nucleotide nucleotide;
|
|
1003
|
+
Coordinate totalLength = length + wordLength - 1;
|
|
1004
|
+
size_t arrayLength = totalLength / 4;
|
|
1005
|
+
Descriptor *res;
|
|
1006
|
+
Coordinate index;
|
|
1007
|
+
Kmer kmerCopy;
|
|
1008
|
+
|
|
1009
|
+
if (totalLength % 4 > 0)
|
|
1010
|
+
arrayLength++;
|
|
1011
|
+
|
|
1012
|
+
res = callocOrExit(arrayLength, Descriptor);
|
|
1013
|
+
|
|
1014
|
+
copyKmers(&kmerCopy, initialKmer);
|
|
1015
|
+
for (index = wordLength - 2; index >= 0; index--)
|
|
1016
|
+
writeNucleotideInDescriptor_pg(popNucleotide(&kmerCopy), res,
|
|
1017
|
+
index);
|
|
1018
|
+
|
|
1019
|
+
for (index = wordLength - 1; index < totalLength; index++) {
|
|
1020
|
+
if (seqReadInfo->m_bIsBinary) {
|
|
1021
|
+
letter = **seqReadInfo->m_ppCurrString;
|
|
1022
|
+
*seqReadInfo->m_ppCurrString += 1; // increment the pointer
|
|
1023
|
+
} else {
|
|
1024
|
+
letter = getc(seqReadInfo->m_pFile);
|
|
1025
|
+
while (!isalpha(letter))
|
|
1026
|
+
letter = getc(seqReadInfo->m_pFile);
|
|
1027
|
+
}
|
|
1028
|
+
//velvetLog("%c", letter);
|
|
1029
|
+
switch (letter) {
|
|
1030
|
+
case 'N':
|
|
1031
|
+
case 'A':
|
|
1032
|
+
nucleotide = ADENINE;
|
|
1033
|
+
break;
|
|
1034
|
+
case 'C':
|
|
1035
|
+
nucleotide = CYTOSINE;
|
|
1036
|
+
break;
|
|
1037
|
+
case 'G':
|
|
1038
|
+
nucleotide = GUANINE;
|
|
1039
|
+
break;
|
|
1040
|
+
case 'T':
|
|
1041
|
+
nucleotide = THYMINE;
|
|
1042
|
+
break;
|
|
1043
|
+
default:
|
|
1044
|
+
fflush(stdout);
|
|
1045
|
+
abort();
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
writeNucleotideInDescriptor_pg(nucleotide, res, index);
|
|
1049
|
+
pushNucleotide(initialKmer, nucleotide);
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
//velvetLog(" ");
|
|
1053
|
+
|
|
1054
|
+
return res;
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
void allocatePreNodeSpace_pg(PreGraph * preGraph, IDnum preNodeCount)
|
|
1058
|
+
{
|
|
1059
|
+
preGraph->preNodes = callocOrExit(preNodeCount + 1, PreNode);
|
|
1060
|
+
preGraph->preNodeCount = preNodeCount;
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
void allocatePreMarkerCountSpace_pg(PreGraph * preGraph)
|
|
1064
|
+
{
|
|
1065
|
+
preGraph->nodeReferenceMarkerCounts = callocOrExit(preGraph->preNodeCount + 1, IDnum);
|
|
1066
|
+
preGraph->nodeReferenceMarkers = callocOrExit(preGraph->preNodeCount + 1, PreMarker *);
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
void incrementNodeReferenceMarkerCount_pg(PreGraph * preGraph, IDnum preNodeID) {
|
|
1070
|
+
if (preNodeID < 0)
|
|
1071
|
+
preNodeID = -preNodeID;
|
|
1072
|
+
|
|
1073
|
+
preGraph->nodeReferenceMarkerCounts[preNodeID]++;
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
void allocatePreMarkerSpace_pg(PreGraph * preGraph) {
|
|
1077
|
+
IDnum index;
|
|
1078
|
+
|
|
1079
|
+
if (!preGraph->nodeReferenceMarkers)
|
|
1080
|
+
return;
|
|
1081
|
+
|
|
1082
|
+
for (index = 1; index <= preGraph->preNodeCount; index++) {
|
|
1083
|
+
if (preGraph->nodeReferenceMarkerCounts[index])
|
|
1084
|
+
preGraph->nodeReferenceMarkers[index] = callocOrExit(preGraph->nodeReferenceMarkerCounts[index], PreMarker);
|
|
1085
|
+
else
|
|
1086
|
+
preGraph->nodeReferenceMarkers[index] = NULL;
|
|
1087
|
+
preGraph->nodeReferenceMarkerCounts[index] = 0;
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
PreMarker * addPreMarker_pg(PreGraph * preGraph, IDnum nodeID, IDnum seqID, Coordinate * start, PreMarker * previous) {
|
|
1092
|
+
PreMarker * preMarker;
|
|
1093
|
+
IDnum positive_nodeID;
|
|
1094
|
+
|
|
1095
|
+
if (nodeID < 0)
|
|
1096
|
+
abort();
|
|
1097
|
+
else
|
|
1098
|
+
positive_nodeID = nodeID;
|
|
1099
|
+
|
|
1100
|
+
//printf("Adding preMarker %li\n", (long) *start);
|
|
1101
|
+
|
|
1102
|
+
preMarker = &(preGraph->nodeReferenceMarkers[positive_nodeID][(preGraph->nodeReferenceMarkerCounts[positive_nodeID])++]);
|
|
1103
|
+
preMarker->previous = previous;
|
|
1104
|
+
if (previous)
|
|
1105
|
+
previous->next = preMarker;
|
|
1106
|
+
preMarker->next = NULL;
|
|
1107
|
+
preMarker->referenceStart = *start;
|
|
1108
|
+
preMarker->length = preGraph->preNodes[positive_nodeID].length;
|
|
1109
|
+
preMarker->preNodeStart = 0;
|
|
1110
|
+
preMarker->preNodeID = nodeID;
|
|
1111
|
+
preMarker->referenceID = seqID;
|
|
1112
|
+
|
|
1113
|
+
*start += preMarker->length;
|
|
1114
|
+
|
|
1115
|
+
return preMarker;
|
|
1116
|
+
}
|
|
1117
|
+
void addPreNodeToPreGraph_pg(PreGraph * preGraph, Coordinate start,
|
|
1118
|
+
Coordinate finish, SequencesReader *seqReadInfo,
|
|
1119
|
+
Kmer * initialKmer, IDnum ID)
|
|
1120
|
+
{
|
|
1121
|
+
PreNode *newnd = &(preGraph->preNodes[ID]);
|
|
1122
|
+
|
|
1123
|
+
newnd->preArcLeft = NULL_IDX;
|
|
1124
|
+
newnd->preArcRight = NULL_IDX;
|
|
1125
|
+
|
|
1126
|
+
newnd->length = finish - start;
|
|
1127
|
+
|
|
1128
|
+
newnd->descriptor =
|
|
1129
|
+
newDescriptor_pg(newnd->length, seqReadInfo, initialKmer,
|
|
1130
|
+
preGraph->wordLength);
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
static void exportPreNode_pg(FILE * outfile, PreNode * preNode, IDnum ID,
|
|
1134
|
+
int wordLength)
|
|
1135
|
+
{
|
|
1136
|
+
Coordinate index;
|
|
1137
|
+
Nucleotide nucleotide;
|
|
1138
|
+
|
|
1139
|
+
if (preNode == NULL)
|
|
1140
|
+
return;
|
|
1141
|
+
|
|
1142
|
+
velvetFprintf(outfile, "NODE\t%ld\t%lld\n", (long) ID, (long long) preNode->length);
|
|
1143
|
+
|
|
1144
|
+
if (preNode->length == 0) {
|
|
1145
|
+
velvetFprintf(outfile, "\n");
|
|
1146
|
+
return;
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
for (index = 0; index < preNode->length + wordLength - 1; index++) {
|
|
1150
|
+
nucleotide =
|
|
1151
|
+
getNucleotideInDescriptor_pg(preNode->descriptor,
|
|
1152
|
+
index);
|
|
1153
|
+
switch (nucleotide) {
|
|
1154
|
+
case ADENINE:
|
|
1155
|
+
velvetFprintf(outfile, "A");
|
|
1156
|
+
break;
|
|
1157
|
+
case CYTOSINE:
|
|
1158
|
+
velvetFprintf(outfile, "C");
|
|
1159
|
+
break;
|
|
1160
|
+
case GUANINE:
|
|
1161
|
+
velvetFprintf(outfile, "G");
|
|
1162
|
+
break;
|
|
1163
|
+
case THYMINE:
|
|
1164
|
+
velvetFprintf(outfile, "T");
|
|
1165
|
+
break;
|
|
1166
|
+
}
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
velvetFprintf(outfile, "\n");
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
static void exportPreMarker(FILE * outfile, PreMarker* preMarker) {
|
|
1173
|
+
velvetFprintf(outfile, "%li\t%lli\t%lli\t%lli\n", (long) preMarker->preNodeID, (long long) preMarker->preNodeStart, (long long) preMarker->referenceStart, (long long) preMarker->length);
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
static void exportPreReference_pg(FILE * outfile, IDnum refIndex, PreGraph * preGraph) {
|
|
1177
|
+
PreMarker * preMarker;
|
|
1178
|
+
IDnum nodeID, index;
|
|
1179
|
+
|
|
1180
|
+
velvetFprintf(outfile, "SEQ\t%li\n", (long) refIndex);
|
|
1181
|
+
|
|
1182
|
+
for (nodeID = 1; nodeID <= preGraph->preNodeCount; nodeID++) {
|
|
1183
|
+
for (index = 0; index < preGraph->nodeReferenceMarkerCounts[nodeID]; index++) {
|
|
1184
|
+
preMarker = &(preGraph->nodeReferenceMarkers[nodeID][index]);
|
|
1185
|
+
if (preMarker->referenceID == refIndex && !preMarker->previous) {
|
|
1186
|
+
for (;preMarker;preMarker = preMarker->next) {
|
|
1187
|
+
exportPreMarker(outfile, preMarker);
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1190
|
+
}
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
void exportPreGraph_pg(char *filename, PreGraph * preGraph)
|
|
1195
|
+
{
|
|
1196
|
+
IDnum index;
|
|
1197
|
+
FILE *outfile;
|
|
1198
|
+
PreNode *preNode;
|
|
1199
|
+
int wordLength = getWordLength_pg(preGraph);
|
|
1200
|
+
|
|
1201
|
+
if (preGraph == NULL) {
|
|
1202
|
+
return;
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
outfile = fopen(filename, "w");
|
|
1206
|
+
if (outfile == NULL) {
|
|
1207
|
+
velvetLog("Couldn't open file, sorry\n");
|
|
1208
|
+
return;
|
|
1209
|
+
} else
|
|
1210
|
+
velvetLog("Writing into pregraph file %s...\n", filename);
|
|
1211
|
+
|
|
1212
|
+
// General data
|
|
1213
|
+
velvetFprintf(outfile, "%ld\t%ld\t%i\t%hi\n", (long) preGraph->preNodeCount,
|
|
1214
|
+
(long) preGraph->sequenceCount, preGraph->wordLength, (short) preGraph->double_strand);
|
|
1215
|
+
|
|
1216
|
+
// PreNode info
|
|
1217
|
+
for (index = 1; index <= preGraph->preNodeCount; index++) {
|
|
1218
|
+
preNode = getPreNodeInPreGraph_pg(preGraph, index);
|
|
1219
|
+
exportPreNode_pg(outfile, preNode, index, wordLength);
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
// Reference sequence info
|
|
1223
|
+
for (index = 1; index <= preGraph->referenceCount; index++)
|
|
1224
|
+
exportPreReference_pg(outfile, index, preGraph);
|
|
1225
|
+
|
|
1226
|
+
|
|
1227
|
+
fclose(outfile);
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
int getWordLength_pg(PreGraph * preGraph)
|
|
1231
|
+
{
|
|
1232
|
+
return preGraph->wordLength;
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
boolean hasSinglePreArc_pg(IDnum preNodeID, PreGraph * preGraph)
|
|
1236
|
+
{
|
|
1237
|
+
IDnum ID = preNodeID;
|
|
1238
|
+
PreNode *preNode;
|
|
1239
|
+
PreArcI preArc;
|
|
1240
|
+
|
|
1241
|
+
if (ID < 0)
|
|
1242
|
+
ID = -ID;
|
|
1243
|
+
|
|
1244
|
+
preNode = &(preGraph->preNodes[ID]);
|
|
1245
|
+
|
|
1246
|
+
if (preNodeID > 0)
|
|
1247
|
+
preArc = preNode->preArcRight;
|
|
1248
|
+
else
|
|
1249
|
+
preArc = preNode->preArcLeft;
|
|
1250
|
+
|
|
1251
|
+
return (preArc != NULL_IDX
|
|
1252
|
+
&& getNextPreArc_pg(preArc, preNodeID) == NULL_IDX);
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
char simplePreArcCount_pg(IDnum preNodeID, PreGraph * preGraph)
|
|
1256
|
+
{
|
|
1257
|
+
PreNode *preNode;
|
|
1258
|
+
PreArcI preArc;
|
|
1259
|
+
char count = 0;
|
|
1260
|
+
IDnum ID = preNodeID;
|
|
1261
|
+
|
|
1262
|
+
if (ID < 0)
|
|
1263
|
+
ID = -ID;
|
|
1264
|
+
|
|
1265
|
+
preNode = &(preGraph->preNodes[ID]);
|
|
1266
|
+
|
|
1267
|
+
if (preNodeID > 0)
|
|
1268
|
+
preArc = preNode->preArcRight;
|
|
1269
|
+
else
|
|
1270
|
+
preArc = preNode->preArcLeft;
|
|
1271
|
+
|
|
1272
|
+
for (; preArc != NULL_IDX;
|
|
1273
|
+
preArc = getNextPreArc_pg(preArc, preNodeID))
|
|
1274
|
+
count++;
|
|
1275
|
+
|
|
1276
|
+
return count;
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
boolean isLoop_pg(PreArcI preArc)
|
|
1280
|
+
{
|
|
1281
|
+
PreArc *preArcVal = PREARC_FI2P (preArc);
|
|
1282
|
+
|
|
1283
|
+
return (preArcVal->preNodeIDLeft == preArcVal->preNodeIDRight
|
|
1284
|
+
|| preArcVal->preNodeIDLeft == -preArcVal->preNodeIDRight);
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
void setPreNodeDescriptor_pg(Descriptor * descr, Coordinate length, IDnum preNodeID, PreGraph * preGraph) {
|
|
1288
|
+
PreNode * preNode;
|
|
1289
|
+
|
|
1290
|
+
if (preNodeID < 0)
|
|
1291
|
+
preNodeID = -preNodeID;
|
|
1292
|
+
|
|
1293
|
+
preNode = getPreNodeInPreGraph_pg(preGraph, preNodeID);
|
|
1294
|
+
free(preNode->descriptor);
|
|
1295
|
+
preNode->descriptor = descr;
|
|
1296
|
+
preNode->length = length;
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
static void appendPositiveDescriptor_pg(Descriptor ** writePtr, int * writeOffset, IDnum preNodeID, PreGraph * preGraph, boolean initial) {
|
|
1300
|
+
PreNode * preNode = getPreNodeInPreGraph_pg(preGraph, preNodeID);
|
|
1301
|
+
Descriptor * readPtr = preNode->descriptor;
|
|
1302
|
+
Descriptor readCopy;
|
|
1303
|
+
int wordLength = getWordLength_pg(preGraph);
|
|
1304
|
+
Coordinate length = preNode->length;
|
|
1305
|
+
Coordinate index;
|
|
1306
|
+
int readOffset = 0;
|
|
1307
|
+
|
|
1308
|
+
if (initial) {
|
|
1309
|
+
index = 0;
|
|
1310
|
+
readPtr = preNode->descriptor;
|
|
1311
|
+
readCopy = *readPtr;
|
|
1312
|
+
readOffset = 0;
|
|
1313
|
+
} else {
|
|
1314
|
+
index = wordLength - 1;
|
|
1315
|
+
readPtr = &(preNode->descriptor[(wordLength - 1) / 4]);
|
|
1316
|
+
readCopy = *readPtr;
|
|
1317
|
+
readOffset = (wordLength - 1) % 4;
|
|
1318
|
+
readCopy >>= (readOffset * 2);
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
for (; index < length + wordLength - 1; index++) {
|
|
1322
|
+
(**writePtr) >>= 2;
|
|
1323
|
+
if (readOffset == 0)
|
|
1324
|
+
readCopy = *readPtr;
|
|
1325
|
+
(**writePtr) += (readCopy & 3) << 6;
|
|
1326
|
+
readCopy >>= 2;
|
|
1327
|
+
|
|
1328
|
+
if (++(*writeOffset) == 4) {
|
|
1329
|
+
(*writePtr)++;
|
|
1330
|
+
*writeOffset = 0;
|
|
1331
|
+
}
|
|
1332
|
+
|
|
1333
|
+
if (++readOffset == 4) {
|
|
1334
|
+
readPtr++;
|
|
1335
|
+
readOffset = 0;
|
|
1336
|
+
}
|
|
1337
|
+
}
|
|
1338
|
+
}
|
|
1339
|
+
|
|
1340
|
+
static void appendNegativeDescriptor_pg(Descriptor ** writePtr, int * writeOffset, IDnum preNodeID, PreGraph * preGraph, boolean initial) {
|
|
1341
|
+
PreNode * preNode = getPreNodeInPreGraph_pg(preGraph, preNodeID);
|
|
1342
|
+
Descriptor * readPtr = preNode->descriptor;
|
|
1343
|
+
Descriptor readCopy;
|
|
1344
|
+
int wordLength = getWordLength_pg(preGraph);
|
|
1345
|
+
Coordinate length = preNode->length;
|
|
1346
|
+
Coordinate index;
|
|
1347
|
+
int readOffset;
|
|
1348
|
+
|
|
1349
|
+
if (initial)
|
|
1350
|
+
length += wordLength - 1;
|
|
1351
|
+
|
|
1352
|
+
readPtr = &(preNode->descriptor[(length - 1) / 4]);
|
|
1353
|
+
readCopy = *readPtr;
|
|
1354
|
+
readOffset = (length - 1) % 4;
|
|
1355
|
+
readCopy <<= ((3 - readOffset) * 2);
|
|
1356
|
+
|
|
1357
|
+
for (index = 0; index < length; index++) {
|
|
1358
|
+
(**writePtr) >>= 2;
|
|
1359
|
+
if (readOffset == 3)
|
|
1360
|
+
readCopy = *readPtr;
|
|
1361
|
+
#ifndef COLOR
|
|
1362
|
+
(**writePtr) += 192 - (readCopy & 192);
|
|
1363
|
+
#else
|
|
1364
|
+
(**writePtr) += (readCopy & 192);
|
|
1365
|
+
#endif
|
|
1366
|
+
readCopy <<= 2;
|
|
1367
|
+
|
|
1368
|
+
(*writeOffset)++;
|
|
1369
|
+
if (*writeOffset == 4) {
|
|
1370
|
+
(*writePtr)++;
|
|
1371
|
+
*writeOffset = 0;
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
readOffset--;
|
|
1375
|
+
if (readOffset == -1) {
|
|
1376
|
+
readPtr--;
|
|
1377
|
+
readOffset = 3;
|
|
1378
|
+
}
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1381
|
+
|
|
1382
|
+
void appendDescriptors_pg(Descriptor ** start, int * writeOffset, IDnum preNodeID, PreGraph* preGraph, boolean initial) {
|
|
1383
|
+
if (preNodeID > 0)
|
|
1384
|
+
appendPositiveDescriptor_pg(start, writeOffset, preNodeID, preGraph, initial);
|
|
1385
|
+
else
|
|
1386
|
+
appendNegativeDescriptor_pg(start, writeOffset, -preNodeID, preGraph, initial);
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
boolean referenceMarkersAreActivated_pg(PreGraph * preGraph) {
|
|
1390
|
+
return preGraph->nodeReferenceMarkers != NULL;
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
static void copyPreMarker(PreMarker * dest, PreMarker * source, IDnum preNodeAID, PreGraph * preGraph) {
|
|
1394
|
+
dest->previous = source->previous;
|
|
1395
|
+
dest->next = source->next;
|
|
1396
|
+
|
|
1397
|
+
dest->preNodeStart = source->preNodeStart;
|
|
1398
|
+
dest->length = source->length;
|
|
1399
|
+
dest->referenceID = source->referenceID;
|
|
1400
|
+
dest->referenceStart = source->referenceStart;
|
|
1401
|
+
|
|
1402
|
+
if (source->preNodeID > 0)
|
|
1403
|
+
dest->preNodeID = preNodeAID;
|
|
1404
|
+
else
|
|
1405
|
+
dest->preNodeID = -preNodeAID;
|
|
1406
|
+
|
|
1407
|
+
if (source->previous)
|
|
1408
|
+
source->previous->next = dest;
|
|
1409
|
+
if (source->next)
|
|
1410
|
+
source->next->previous = dest;
|
|
1411
|
+
|
|
1412
|
+
source->referenceID = 0;
|
|
1413
|
+
source->preNodeID = 0;
|
|
1414
|
+
source->previous = NULL;
|
|
1415
|
+
source->next = NULL;
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
static PreMarker * reallocOrExitReferenceMarkers(PreGraph * preGraph, IDnum preNodeID, IDnum length) {
|
|
1419
|
+
PreMarker * array = callocOrExit(length, PreMarker);
|
|
1420
|
+
PreMarker * writer = array;
|
|
1421
|
+
PreMarker * reader = preGraph->nodeReferenceMarkers[preNodeID];
|
|
1422
|
+
IDnum index;
|
|
1423
|
+
|
|
1424
|
+
for (index = 0; index < preGraph->nodeReferenceMarkerCounts[preNodeID]; index++) {
|
|
1425
|
+
copyPreMarker(writer, reader, preNodeID, preGraph);
|
|
1426
|
+
writer++;
|
|
1427
|
+
reader++;
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
free(preGraph->nodeReferenceMarkers[preNodeID]);
|
|
1431
|
+
|
|
1432
|
+
return array;
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
static void concatenateReferenceMarkers_H2T_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
|
|
1436
|
+
IDnum index;
|
|
1437
|
+
IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
|
|
1438
|
+
IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
|
|
1439
|
+
Coordinate lengthA = preGraph->preNodes[preNodeAID].length + totalOffset;
|
|
1440
|
+
PreMarker * markerA, *next, *markerB;
|
|
1441
|
+
IDnum counter = 0;
|
|
1442
|
+
|
|
1443
|
+
for (index = 0 ; index < countA; index++) {
|
|
1444
|
+
markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
|
|
1445
|
+
|
|
1446
|
+
if (markerA->preNodeID > 0)
|
|
1447
|
+
next = markerA->next;
|
|
1448
|
+
else
|
|
1449
|
+
next = markerA->previous;
|
|
1450
|
+
|
|
1451
|
+
if (!next)
|
|
1452
|
+
continue;
|
|
1453
|
+
|
|
1454
|
+
if (markerA->preNodeID == preNodeAID && next->preNodeID != preNodeBID)
|
|
1455
|
+
continue;
|
|
1456
|
+
if (markerA->preNodeID == -preNodeAID && next->preNodeID != -preNodeBID)
|
|
1457
|
+
continue;
|
|
1458
|
+
|
|
1459
|
+
next->referenceID = 0;
|
|
1460
|
+
next->preNodeID = 0;
|
|
1461
|
+
|
|
1462
|
+
markerA->length += next->length;
|
|
1463
|
+
if (markerA->preNodeID > 0) {
|
|
1464
|
+
markerA->next = next->next;
|
|
1465
|
+
if (next->next)
|
|
1466
|
+
next->next->previous = markerA;
|
|
1467
|
+
} else {
|
|
1468
|
+
markerA->previous = next->previous;
|
|
1469
|
+
if (next->previous)
|
|
1470
|
+
next->previous->next = markerA;
|
|
1471
|
+
markerA->referenceStart = next->referenceStart;
|
|
1472
|
+
}
|
|
1473
|
+
next->next = NULL;
|
|
1474
|
+
next->previous = NULL;
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
for (index = 0; index < countB; index++)
|
|
1478
|
+
if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
|
|
1479
|
+
counter++;
|
|
1480
|
+
|
|
1481
|
+
if (counter == 0)
|
|
1482
|
+
return;
|
|
1483
|
+
|
|
1484
|
+
if (countA)
|
|
1485
|
+
preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
|
|
1486
|
+
else
|
|
1487
|
+
preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
|
|
1488
|
+
|
|
1489
|
+
for (index = 0; index < countB; index++) {
|
|
1490
|
+
markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
|
|
1491
|
+
if (markerB->referenceID) {
|
|
1492
|
+
markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
|
|
1493
|
+
copyPreMarker(markerA, markerB, preNodeAID, preGraph);
|
|
1494
|
+
markerA->preNodeStart += lengthA;
|
|
1495
|
+
}
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1498
|
+
preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1501
|
+
static void concatenateReferenceMarkers_H2H_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
|
|
1502
|
+
IDnum index;
|
|
1503
|
+
IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
|
|
1504
|
+
IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
|
|
1505
|
+
Coordinate lengthA = preGraph->preNodes[preNodeAID].length + totalOffset;
|
|
1506
|
+
Coordinate lengthB = preGraph->preNodes[preNodeBID].length;
|
|
1507
|
+
PreMarker * markerA, *next, *markerB;
|
|
1508
|
+
IDnum counter = 0;
|
|
1509
|
+
|
|
1510
|
+
for (index = 0 ; index < countA; index++) {
|
|
1511
|
+
markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
|
|
1512
|
+
|
|
1513
|
+
if (markerA->preNodeID > 0)
|
|
1514
|
+
next = markerA->next;
|
|
1515
|
+
else
|
|
1516
|
+
next = markerA->previous;
|
|
1517
|
+
|
|
1518
|
+
|
|
1519
|
+
if ((!next)
|
|
1520
|
+
|| (markerA->preNodeID == preNodeAID && next->preNodeID != -preNodeBID)
|
|
1521
|
+
|| (markerA->preNodeID == -preNodeAID && next->preNodeID != preNodeBID))
|
|
1522
|
+
continue;
|
|
1523
|
+
|
|
1524
|
+
next->referenceID = 0;
|
|
1525
|
+
next->preNodeID = 0;
|
|
1526
|
+
|
|
1527
|
+
markerA->length += next->length;
|
|
1528
|
+
if (markerA->preNodeID > 0) {
|
|
1529
|
+
markerA->next = next->next;
|
|
1530
|
+
if (next->next)
|
|
1531
|
+
next->next->previous = markerA;
|
|
1532
|
+
} else {
|
|
1533
|
+
markerA->previous = next->previous;
|
|
1534
|
+
if (next->previous)
|
|
1535
|
+
next->previous->next = markerA;
|
|
1536
|
+
markerA->referenceStart = next->referenceStart;
|
|
1537
|
+
}
|
|
1538
|
+
next->next = NULL;
|
|
1539
|
+
next->previous = NULL;
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
for (index = 0; index < countB; index++)
|
|
1543
|
+
if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
|
|
1544
|
+
counter++;
|
|
1545
|
+
|
|
1546
|
+
if (counter == 0)
|
|
1547
|
+
return;
|
|
1548
|
+
|
|
1549
|
+
if (countA)
|
|
1550
|
+
preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
|
|
1551
|
+
else
|
|
1552
|
+
preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
|
|
1553
|
+
|
|
1554
|
+
for (index = 0; index < countB; index++) {
|
|
1555
|
+
markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
|
|
1556
|
+
if (markerB->referenceID) {
|
|
1557
|
+
markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
|
|
1558
|
+
copyPreMarker(markerA, markerB, preNodeAID, preGraph);
|
|
1559
|
+
markerA->preNodeID *= -1;
|
|
1560
|
+
markerA->preNodeStart = lengthA + lengthB - markerA->preNodeStart - markerA->length;
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
|
|
1564
|
+
preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
|
|
1565
|
+
}
|
|
1566
|
+
|
|
1567
|
+
static void concatenateReferenceMarkers_T2T_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
|
|
1568
|
+
IDnum index;
|
|
1569
|
+
IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
|
|
1570
|
+
IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
|
|
1571
|
+
Coordinate lengthB = preGraph->preNodes[preNodeBID].length;
|
|
1572
|
+
PreMarker * markerA, *next, *markerB;
|
|
1573
|
+
IDnum counter = 0;
|
|
1574
|
+
|
|
1575
|
+
for (index = 0 ; index < countA; index++) {
|
|
1576
|
+
markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
|
|
1577
|
+
|
|
1578
|
+
if (markerA->preNodeID < 0)
|
|
1579
|
+
next = markerA->next;
|
|
1580
|
+
else
|
|
1581
|
+
next = markerA->previous;
|
|
1582
|
+
|
|
1583
|
+
if (!next
|
|
1584
|
+
|| (markerA->preNodeID == preNodeAID && next->preNodeID != -preNodeBID)
|
|
1585
|
+
|| (markerA->preNodeID == -preNodeAID && next->preNodeID != preNodeBID)) {
|
|
1586
|
+
markerA->preNodeStart += lengthB;
|
|
1587
|
+
continue;
|
|
1588
|
+
}
|
|
1589
|
+
|
|
1590
|
+
next->referenceID = 0;
|
|
1591
|
+
next->preNodeID = 0;
|
|
1592
|
+
|
|
1593
|
+
markerA->length += next->length;
|
|
1594
|
+
markerA->preNodeStart = lengthB - next->preNodeStart - next->length;
|
|
1595
|
+
if (markerA->preNodeID < 0) {
|
|
1596
|
+
markerA->next = next->next;
|
|
1597
|
+
if (next->next)
|
|
1598
|
+
next->next->previous = markerA;
|
|
1599
|
+
} else {
|
|
1600
|
+
markerA->previous = next->previous;
|
|
1601
|
+
if (next->previous)
|
|
1602
|
+
next->previous->next = markerA;
|
|
1603
|
+
markerA->referenceStart = next->referenceStart;
|
|
1604
|
+
}
|
|
1605
|
+
next->next = NULL;
|
|
1606
|
+
next->previous = NULL;
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
for (index = 0; index < countB; index++)
|
|
1610
|
+
if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
|
|
1611
|
+
counter++;
|
|
1612
|
+
|
|
1613
|
+
if (counter == 0)
|
|
1614
|
+
return;
|
|
1615
|
+
|
|
1616
|
+
if (countA)
|
|
1617
|
+
preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
|
|
1618
|
+
else
|
|
1619
|
+
preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
|
|
1620
|
+
|
|
1621
|
+
for (index = 0; index < countB; index++) {
|
|
1622
|
+
markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
|
|
1623
|
+
if (markerB->referenceID) {
|
|
1624
|
+
markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
|
|
1625
|
+
copyPreMarker(markerA, markerB, preNodeAID, preGraph);
|
|
1626
|
+
markerA->preNodeID *= -1;
|
|
1627
|
+
markerA->preNodeStart = lengthB - markerA->preNodeStart - markerA->length;
|
|
1628
|
+
}
|
|
1629
|
+
}
|
|
1630
|
+
|
|
1631
|
+
preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
|
|
1632
|
+
}
|
|
1633
|
+
|
|
1634
|
+
static void concatenateReferenceMarkers_T2H_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
|
|
1635
|
+
IDnum index;
|
|
1636
|
+
IDnum countA = preGraph->nodeReferenceMarkerCounts[preNodeAID];
|
|
1637
|
+
IDnum countB = preGraph->nodeReferenceMarkerCounts[preNodeBID];
|
|
1638
|
+
PreMarker * markerA, *next, *markerB;
|
|
1639
|
+
Coordinate lengthB = preGraph->preNodes[preNodeBID].length;
|
|
1640
|
+
IDnum counter = 0;
|
|
1641
|
+
|
|
1642
|
+
for (index = 0 ; index < countA; index++) {
|
|
1643
|
+
markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][index]);
|
|
1644
|
+
|
|
1645
|
+
if (markerA->preNodeID < 0)
|
|
1646
|
+
next = markerA->next;
|
|
1647
|
+
else
|
|
1648
|
+
next = markerA->previous;
|
|
1649
|
+
|
|
1650
|
+
if (!next
|
|
1651
|
+
|| (markerA->preNodeID == preNodeAID && next->preNodeID != preNodeBID)
|
|
1652
|
+
|| (markerA->preNodeID == -preNodeAID && next->preNodeID != -preNodeBID)) {
|
|
1653
|
+
markerA->preNodeStart += lengthB;
|
|
1654
|
+
continue;
|
|
1655
|
+
}
|
|
1656
|
+
|
|
1657
|
+
next->referenceID = 0;
|
|
1658
|
+
next->preNodeID = 0;
|
|
1659
|
+
|
|
1660
|
+
markerA->length += next->length;
|
|
1661
|
+
markerA->preNodeStart = next->preNodeStart;
|
|
1662
|
+
if (markerA->preNodeID < 0) {
|
|
1663
|
+
markerA->next = next->next;
|
|
1664
|
+
if (next->next)
|
|
1665
|
+
next->next->previous = markerA;
|
|
1666
|
+
} else {
|
|
1667
|
+
markerA->previous = next->previous;
|
|
1668
|
+
if (next->previous)
|
|
1669
|
+
next->previous->next = markerA;
|
|
1670
|
+
markerA->referenceStart = next->referenceStart;
|
|
1671
|
+
}
|
|
1672
|
+
next->next = NULL;
|
|
1673
|
+
next->previous = NULL;
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
for (index = 0; index < countB; index++)
|
|
1677
|
+
if (preGraph->nodeReferenceMarkers[preNodeBID][index].referenceID)
|
|
1678
|
+
counter++;
|
|
1679
|
+
|
|
1680
|
+
if (counter == 0)
|
|
1681
|
+
return;
|
|
1682
|
+
|
|
1683
|
+
if (countA)
|
|
1684
|
+
preGraph->nodeReferenceMarkers[preNodeAID] = reallocOrExitReferenceMarkers(preGraph, preNodeAID, countA + counter);
|
|
1685
|
+
else
|
|
1686
|
+
preGraph->nodeReferenceMarkers[preNodeAID] = callocOrExit(counter, PreMarker);
|
|
1687
|
+
|
|
1688
|
+
for (index = 0; index < countB; index++) {
|
|
1689
|
+
markerB = &(preGraph->nodeReferenceMarkers[preNodeBID][index]);
|
|
1690
|
+
if (markerB->referenceID) {
|
|
1691
|
+
markerA = &(preGraph->nodeReferenceMarkers[preNodeAID][countA++]);
|
|
1692
|
+
copyPreMarker(markerA, markerB, preNodeAID, preGraph);
|
|
1693
|
+
}
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
preGraph->nodeReferenceMarkerCounts[preNodeAID] = countA;
|
|
1697
|
+
}
|
|
1698
|
+
|
|
1699
|
+
void concatenateReferenceMarkers_pg(IDnum preNodeAID, IDnum preNodeBID, PreGraph * preGraph, Coordinate totalOffset) {
|
|
1700
|
+
if (!referenceMarkersAreActivated_pg(preGraph))
|
|
1701
|
+
return;
|
|
1702
|
+
|
|
1703
|
+
if (preNodeAID > 0 && preNodeBID > 0)
|
|
1704
|
+
concatenateReferenceMarkers_H2T_pg(preNodeAID, preNodeBID, preGraph, totalOffset);
|
|
1705
|
+
else if (preNodeAID > 0)
|
|
1706
|
+
concatenateReferenceMarkers_H2H_pg(preNodeAID, -preNodeBID, preGraph, totalOffset);
|
|
1707
|
+
else if (preNodeBID > 0)
|
|
1708
|
+
concatenateReferenceMarkers_T2T_pg(-preNodeAID, preNodeBID, preGraph, totalOffset);
|
|
1709
|
+
else
|
|
1710
|
+
concatenateReferenceMarkers_T2H_pg(-preNodeAID, -preNodeBID, preGraph, totalOffset);
|
|
1711
|
+
}
|
|
1712
|
+
|
|
1713
|
+
boolean hasPreMarkers(IDnum nodeID, PreGraph * preGraph) {
|
|
1714
|
+
if (nodeID < 0)
|
|
1715
|
+
nodeID = -nodeID;
|
|
1716
|
+
return preGraph->nodeReferenceMarkerCounts[nodeID] > 0;
|
|
1717
|
+
}
|