finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/*
|
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
|
3
|
+
|
|
4
|
+
This file is part of Velvet.
|
|
5
|
+
|
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with Velvet; if not, write to the Free Software
|
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
19
|
+
|
|
20
|
+
*/
|
|
21
|
+
#ifndef _SHORTREADPAIRS_H_
|
|
22
|
+
#define _SHORTREADPAIRS_H_
|
|
23
|
+
|
|
24
|
+
void exploitShortReadPairs(Graph * graph,
|
|
25
|
+
ReadSet * reads,
|
|
26
|
+
boolean * dubious,
|
|
27
|
+
boolean * shadows,
|
|
28
|
+
boolean force_jumps);
|
|
29
|
+
void handicapNode(Node * node);
|
|
30
|
+
NodeList *getMarkedNodeList();
|
|
31
|
+
|
|
32
|
+
#endif
|
data/ext/src/src/splay.c
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
/*
|
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
|
3
|
+
|
|
4
|
+
This file is part of Velvet.
|
|
5
|
+
|
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with Velvet; if not, write to the Free Software
|
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
19
|
+
|
|
20
|
+
*/
|
|
21
|
+
#include <stdlib.h>
|
|
22
|
+
#include <stdio.h>
|
|
23
|
+
|
|
24
|
+
#ifdef _OPENMP
|
|
25
|
+
#include <omp.h>
|
|
26
|
+
#endif
|
|
27
|
+
|
|
28
|
+
#include "globals.h"
|
|
29
|
+
#include "recycleBin.h"
|
|
30
|
+
#include "kmer.h"
|
|
31
|
+
#include "utility.h"
|
|
32
|
+
|
|
33
|
+
#define CHUNKSIZE 10000
|
|
34
|
+
|
|
35
|
+
static RecycleBin *treeMemory = NULL;
|
|
36
|
+
|
|
37
|
+
struct splayNode_st {
|
|
38
|
+
Kmer kmer;
|
|
39
|
+
Coordinate position;
|
|
40
|
+
struct splayNode_st *left;
|
|
41
|
+
struct splayNode_st *right;
|
|
42
|
+
IDnum seqID;
|
|
43
|
+
} ATTRIBUTE_PACKED;
|
|
44
|
+
|
|
45
|
+
typedef struct splayNode_st SplayNode;
|
|
46
|
+
typedef struct splayNode_st SplayTree;
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
#ifdef _OPENMP
|
|
50
|
+
void initSplayTreeMemory(void)
|
|
51
|
+
{
|
|
52
|
+
int n;
|
|
53
|
+
|
|
54
|
+
n = omp_get_max_threads();
|
|
55
|
+
#pragma omp critical
|
|
56
|
+
if (treeMemory == NULL)
|
|
57
|
+
treeMemory = newRecycleBinArray(n, sizeof(SplayNode), CHUNKSIZE);
|
|
58
|
+
}
|
|
59
|
+
#endif
|
|
60
|
+
|
|
61
|
+
static SplayNode *allocateSplayNode()
|
|
62
|
+
{
|
|
63
|
+
#ifdef _OPENMP
|
|
64
|
+
#ifdef DEBUG
|
|
65
|
+
if (treeMemory == NULL)
|
|
66
|
+
{
|
|
67
|
+
velvetLog("The memory for splay trees seems uninitialised, "
|
|
68
|
+
"this is probably a bug, aborting.\n");
|
|
69
|
+
abort();
|
|
70
|
+
}
|
|
71
|
+
#endif
|
|
72
|
+
return allocatePointer(getRecycleBinInArray(treeMemory,
|
|
73
|
+
omp_get_thread_num()));
|
|
74
|
+
#else
|
|
75
|
+
if (treeMemory == NULL)
|
|
76
|
+
treeMemory = newRecycleBin(sizeof(SplayNode), CHUNKSIZE);
|
|
77
|
+
|
|
78
|
+
return allocatePointer(treeMemory);
|
|
79
|
+
#endif
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
void destroyAllSplayTrees()
|
|
83
|
+
{
|
|
84
|
+
#ifdef _OPENMP
|
|
85
|
+
destroyRecycleBinArray(treeMemory);
|
|
86
|
+
#else
|
|
87
|
+
destroyRecycleBin(treeMemory);
|
|
88
|
+
#endif
|
|
89
|
+
treeMemory = NULL;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/* This function can be called only if K2 has a left child */
|
|
93
|
+
/* Perform a rotate between a node (K2) and its left child */
|
|
94
|
+
/* Update heights, then return new root */
|
|
95
|
+
|
|
96
|
+
static SplayNode *SingleRotateWithLeft(SplayNode * K2)
|
|
97
|
+
{
|
|
98
|
+
SplayNode *K1;
|
|
99
|
+
|
|
100
|
+
K1 = K2->left;
|
|
101
|
+
K2->left = K1->right;
|
|
102
|
+
K1->right = K2;
|
|
103
|
+
|
|
104
|
+
return K1; /* New root */
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/* This function can be called only if K1 has a right child */
|
|
108
|
+
/* Perform a rotate between a node (K1) and its right child */
|
|
109
|
+
/* Update heights, then return new root */
|
|
110
|
+
|
|
111
|
+
static SplayNode *SingleRotateWithRight(SplayNode * K1)
|
|
112
|
+
{
|
|
113
|
+
SplayNode *K2;
|
|
114
|
+
|
|
115
|
+
K2 = K1->right;
|
|
116
|
+
K1->right = K2->left;
|
|
117
|
+
K2->left = K1;
|
|
118
|
+
|
|
119
|
+
return K2; /* New root */
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/* Top-down splay procedure, */
|
|
123
|
+
/* not requiring kmer to be in tree */
|
|
124
|
+
|
|
125
|
+
static SplayTree *Splay(Kmer * kmer, SplayTree * T)
|
|
126
|
+
{
|
|
127
|
+
SplayNode Header;
|
|
128
|
+
SplayNode *LeftTreeMax, *RightTreeMin;
|
|
129
|
+
|
|
130
|
+
if (T == NULL)
|
|
131
|
+
return NULL;
|
|
132
|
+
|
|
133
|
+
Header.left = Header.right = NULL;
|
|
134
|
+
LeftTreeMax = RightTreeMin = &Header;
|
|
135
|
+
|
|
136
|
+
while (compareKmers(kmer, &(T->kmer))) {
|
|
137
|
+
if (compareKmers(kmer, &(T->kmer)) < 0) {
|
|
138
|
+
if (T->left == NULL)
|
|
139
|
+
break;
|
|
140
|
+
if (compareKmers(kmer, &(T->left->kmer)) < 0)
|
|
141
|
+
T = SingleRotateWithLeft(T);
|
|
142
|
+
if (T->left == NULL)
|
|
143
|
+
break;
|
|
144
|
+
/* Link right */
|
|
145
|
+
RightTreeMin->left = T;
|
|
146
|
+
RightTreeMin = T;
|
|
147
|
+
T = T->left;
|
|
148
|
+
} else {
|
|
149
|
+
if (T->right == NULL)
|
|
150
|
+
break;
|
|
151
|
+
if (compareKmers(kmer, &(T->right->kmer)) > 0)
|
|
152
|
+
T = SingleRotateWithRight(T);
|
|
153
|
+
if (T->right == NULL)
|
|
154
|
+
break;
|
|
155
|
+
/* Link left */
|
|
156
|
+
LeftTreeMax->right = T;
|
|
157
|
+
LeftTreeMax = T;
|
|
158
|
+
T = T->right;
|
|
159
|
+
}
|
|
160
|
+
} /* while kmer != T->kmer */
|
|
161
|
+
|
|
162
|
+
/* Reassemble */
|
|
163
|
+
LeftTreeMax->right = T->left;
|
|
164
|
+
RightTreeMin->left = T->right;
|
|
165
|
+
T->left = Header.right;
|
|
166
|
+
T->right = Header.left;
|
|
167
|
+
|
|
168
|
+
return T;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
Kmer * findInTree(Kmer * X, SplayTree ** T)
|
|
172
|
+
{
|
|
173
|
+
*T = Splay(X, *T);
|
|
174
|
+
return &((*T)->kmer);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
void insertIntoTree(Kmer * kmer, SplayTree ** T)
|
|
178
|
+
{
|
|
179
|
+
SplayNode *newNode;
|
|
180
|
+
|
|
181
|
+
if (*T == NULL) {
|
|
182
|
+
newNode = allocateSplayNode();
|
|
183
|
+
copyKmers(&(newNode->kmer), kmer);
|
|
184
|
+
newNode->left = newNode->right = NULL;
|
|
185
|
+
*T = newNode;
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
*T = Splay(kmer, *T);
|
|
190
|
+
if (compareKmers(kmer, &((*T)->kmer)) < 0) {
|
|
191
|
+
newNode = allocateSplayNode();
|
|
192
|
+
copyKmers(&(newNode->kmer), kmer);
|
|
193
|
+
newNode->left = (*T)->left;
|
|
194
|
+
newNode->right = *T;
|
|
195
|
+
(*T)->left = NULL;
|
|
196
|
+
*T = newNode;
|
|
197
|
+
} else if (compareKmers(&((*T)->kmer), kmer) < 0) {
|
|
198
|
+
newNode = allocateSplayNode();
|
|
199
|
+
copyKmers(&(newNode->kmer), kmer);
|
|
200
|
+
newNode->right = (*T)->right;
|
|
201
|
+
newNode->left = *T;
|
|
202
|
+
(*T)->right = NULL;
|
|
203
|
+
*T = newNode;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
boolean
|
|
208
|
+
findOrInsertOccurenceInSplayTree(Kmer * kmer, IDnum * seqID,
|
|
209
|
+
Coordinate * position, SplayTree ** T)
|
|
210
|
+
{
|
|
211
|
+
SplayNode *newNode;
|
|
212
|
+
|
|
213
|
+
if (*T == NULL) {
|
|
214
|
+
newNode = allocateSplayNode();
|
|
215
|
+
copyKmers(&(newNode->kmer), kmer);
|
|
216
|
+
newNode->seqID = *seqID;
|
|
217
|
+
newNode->position = *position;
|
|
218
|
+
|
|
219
|
+
newNode->left = newNode->right = NULL;
|
|
220
|
+
|
|
221
|
+
*T = newNode;
|
|
222
|
+
|
|
223
|
+
return false;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
*T = Splay(kmer, *T);
|
|
227
|
+
if (compareKmers(kmer, &((*T)->kmer)) < 0) {
|
|
228
|
+
newNode = allocateSplayNode();
|
|
229
|
+
copyKmers(&(newNode->kmer), kmer);
|
|
230
|
+
newNode->seqID = *seqID;
|
|
231
|
+
newNode->position = *position;
|
|
232
|
+
|
|
233
|
+
newNode->left = (*T)->left;
|
|
234
|
+
newNode->right = *T;
|
|
235
|
+
(*T)->left = NULL;
|
|
236
|
+
|
|
237
|
+
*T = newNode;
|
|
238
|
+
|
|
239
|
+
return false;
|
|
240
|
+
} else if (compareKmers(kmer, &((*T)->kmer)) > 0) {
|
|
241
|
+
newNode = allocateSplayNode();
|
|
242
|
+
copyKmers(&(newNode->kmer), kmer);
|
|
243
|
+
newNode->seqID = *seqID;
|
|
244
|
+
newNode->position = *position;
|
|
245
|
+
|
|
246
|
+
newNode->right = (*T)->right;
|
|
247
|
+
newNode->left = *T;
|
|
248
|
+
(*T)->right = NULL;
|
|
249
|
+
|
|
250
|
+
*T = newNode;
|
|
251
|
+
|
|
252
|
+
return false;
|
|
253
|
+
} else {
|
|
254
|
+
*seqID = (*T)->seqID;
|
|
255
|
+
*position = (*T)->position;
|
|
256
|
+
|
|
257
|
+
return true;
|
|
258
|
+
}
|
|
259
|
+
}
|
data/ext/src/src/splay.h
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/*
|
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
|
3
|
+
|
|
4
|
+
This file is part of Velvet.
|
|
5
|
+
|
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with Velvet; if not, write to the Free Software
|
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
19
|
+
|
|
20
|
+
*/
|
|
21
|
+
#ifndef _Splay_H
|
|
22
|
+
#define _Splay_H
|
|
23
|
+
|
|
24
|
+
#include <stdio.h>
|
|
25
|
+
|
|
26
|
+
typedef struct splayNode_st SplayTree;
|
|
27
|
+
|
|
28
|
+
// Deallocates tree memory
|
|
29
|
+
void destroyAllSplayTrees();
|
|
30
|
+
|
|
31
|
+
// Finds occurrence of kmer in the tree
|
|
32
|
+
// If found, returns TRUE, and seqID and coordinate are accordingly modified
|
|
33
|
+
// If not, a new leaf is added to the tree, with the seqID and position data
|
|
34
|
+
boolean findOrInsertOccurenceInSplayTree(Kmer * kmer, IDnum * seqID,
|
|
35
|
+
Coordinate * position,
|
|
36
|
+
SplayTree ** T);
|
|
37
|
+
|
|
38
|
+
#ifdef _OPENMP
|
|
39
|
+
/* Initialises the per-thread RecycleBin array */
|
|
40
|
+
void initSplayTreeMemory(void);
|
|
41
|
+
#endif
|
|
42
|
+
|
|
43
|
+
#endif
|
|
@@ -0,0 +1,1315 @@
|
|
|
1
|
+
/*
|
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
|
3
|
+
|
|
4
|
+
This file is part of Velvet.
|
|
5
|
+
|
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
|
7
|
+
it under the terms of the GNU General Public License as published by
|
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
|
9
|
+
(at your option) any later version.
|
|
10
|
+
|
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
14
|
+
GNU General Public License for more details.
|
|
15
|
+
|
|
16
|
+
You should have received a copy of the GNU General Public License
|
|
17
|
+
along with Velvet; if not, write to the Free Software
|
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
19
|
+
|
|
20
|
+
*/
|
|
21
|
+
#include <stdlib.h>
|
|
22
|
+
#include <stdio.h>
|
|
23
|
+
#include <string.h>
|
|
24
|
+
#include <time.h>
|
|
25
|
+
#include <sys/time.h>
|
|
26
|
+
|
|
27
|
+
#ifdef _OPENMP
|
|
28
|
+
#include <omp.h>
|
|
29
|
+
#endif
|
|
30
|
+
|
|
31
|
+
#include "globals.h"
|
|
32
|
+
#include "readSet.h"
|
|
33
|
+
#include "splay.h"
|
|
34
|
+
#include "tightString.h"
|
|
35
|
+
#include "utility.h"
|
|
36
|
+
#include "kmer.h"
|
|
37
|
+
#include "kmerOccurenceTable.h"
|
|
38
|
+
#include "recycleBin.h"
|
|
39
|
+
#include "binarySequences.h"
|
|
40
|
+
|
|
41
|
+
static RecycleBin * maskMemory = NULL;
|
|
42
|
+
|
|
43
|
+
static Mask *allocateMask()
|
|
44
|
+
{
|
|
45
|
+
if (maskMemory == NULL)
|
|
46
|
+
maskMemory = newRecycleBin(sizeof(Mask), 10000);
|
|
47
|
+
|
|
48
|
+
return (Mask *) allocatePointer(maskMemory);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
static Mask * newMask(Coordinate position)
|
|
52
|
+
{
|
|
53
|
+
Mask * mask = allocateMask();
|
|
54
|
+
mask->start = position;
|
|
55
|
+
mask->finish = position;
|
|
56
|
+
mask->next = NULL;
|
|
57
|
+
return mask;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// DEBUG
|
|
61
|
+
boolean debug = false;
|
|
62
|
+
|
|
63
|
+
#define HASH_BUCKETS_NB 16777216
|
|
64
|
+
|
|
65
|
+
#ifdef _OPENMP
|
|
66
|
+
|
|
67
|
+
#define NB_PUSH 32
|
|
68
|
+
#define BUFFER_SIZE 4096
|
|
69
|
+
|
|
70
|
+
static StringBuffer **annotationBuffer = NULL;
|
|
71
|
+
static StringBuffer **annotationBufferW = NULL;
|
|
72
|
+
static int *nbPush = NULL;
|
|
73
|
+
static boolean producing = 1;
|
|
74
|
+
|
|
75
|
+
static void initAnnotationBuffers(void)
|
|
76
|
+
{
|
|
77
|
+
int n;
|
|
78
|
+
int i;
|
|
79
|
+
|
|
80
|
+
n = omp_get_max_threads();
|
|
81
|
+
annotationBuffer = callocOrExit(n, StringBuffer*);
|
|
82
|
+
annotationBufferW = callocOrExit(n, StringBuffer*);
|
|
83
|
+
nbPush = callocOrExit(n, int);
|
|
84
|
+
|
|
85
|
+
for (i = 0; i < n; i++)
|
|
86
|
+
{
|
|
87
|
+
annotationBuffer[i] = newStringBuffer(BUFFER_SIZE);
|
|
88
|
+
annotationBufferW[i] = newStringBuffer(BUFFER_SIZE);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
static void destroyAnnotationBuffers(void)
|
|
93
|
+
{
|
|
94
|
+
int n;
|
|
95
|
+
int i;
|
|
96
|
+
|
|
97
|
+
n = omp_get_max_threads();
|
|
98
|
+
|
|
99
|
+
for (i = 0; i < n; i++)
|
|
100
|
+
{
|
|
101
|
+
destroyStringBuffer(annotationBuffer[i], 1);
|
|
102
|
+
destroyStringBuffer(annotationBufferW[i], 1);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
free(annotationBuffer);
|
|
106
|
+
free(annotationBufferW);
|
|
107
|
+
free(nbPush);
|
|
108
|
+
annotationBuffer = NULL;
|
|
109
|
+
annotationBufferW = NULL;
|
|
110
|
+
nbPush = NULL;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
static void pushBufferCommit(int thread)
|
|
114
|
+
{
|
|
115
|
+
StringBuffer *tmp;
|
|
116
|
+
char *s;
|
|
117
|
+
|
|
118
|
+
s = annotationBufferW[thread]->str;
|
|
119
|
+
do
|
|
120
|
+
{
|
|
121
|
+
#pragma omp flush(s)
|
|
122
|
+
}
|
|
123
|
+
while (*s);
|
|
124
|
+
tmp = annotationBufferW[thread];
|
|
125
|
+
annotationBufferW[thread] = annotationBuffer[thread];
|
|
126
|
+
annotationBuffer[thread] = tmp;
|
|
127
|
+
tmp = annotationBufferW[thread];
|
|
128
|
+
#pragma omp flush(tmp)
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
static void pushBuffer(int thread)
|
|
132
|
+
{
|
|
133
|
+
if (++nbPush[thread] == NB_PUSH)
|
|
134
|
+
{
|
|
135
|
+
nbPush[thread] = 0;
|
|
136
|
+
pushBufferCommit(thread);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
static void writeBuffers(FILE *outFile, int nbThreads)
|
|
141
|
+
{
|
|
142
|
+
int i;
|
|
143
|
+
|
|
144
|
+
for (i = 0; i < nbThreads; i++)
|
|
145
|
+
{
|
|
146
|
+
StringBuffer *b;
|
|
147
|
+
char *s;
|
|
148
|
+
|
|
149
|
+
b = annotationBufferW[i];
|
|
150
|
+
#pragma omp flush(b)
|
|
151
|
+
s = b->str;
|
|
152
|
+
#pragma omp flush(s)
|
|
153
|
+
if (*s)
|
|
154
|
+
{
|
|
155
|
+
velvetFprintf(outFile, "%s", annotationBufferW[i]->str);
|
|
156
|
+
resetStringBuffer(annotationBufferW[i]);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
static void bufferWritter(FILE *outFile)
|
|
162
|
+
{
|
|
163
|
+
int n;
|
|
164
|
+
|
|
165
|
+
n = omp_get_max_threads();
|
|
166
|
+
#pragma omp flush(producing)
|
|
167
|
+
while (producing)
|
|
168
|
+
{
|
|
169
|
+
writeBuffers(outFile, n);
|
|
170
|
+
#pragma omp flush(producing)
|
|
171
|
+
}
|
|
172
|
+
writeBuffers(outFile, n);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
static void appendLine(char *line, int thread)
|
|
176
|
+
{
|
|
177
|
+
appendStringBuffer(annotationBuffer[thread], line);
|
|
178
|
+
}
|
|
179
|
+
#else
|
|
180
|
+
|
|
181
|
+
#define BUFFER_SIZE 1024
|
|
182
|
+
|
|
183
|
+
StringBuffer *annotationBuffer = NULL;
|
|
184
|
+
|
|
185
|
+
static void appendLine(char *line, int thread)
|
|
186
|
+
{
|
|
187
|
+
appendStringBuffer(annotationBuffer, line);
|
|
188
|
+
}
|
|
189
|
+
#endif
|
|
190
|
+
|
|
191
|
+
struct splayTable_st {
|
|
192
|
+
SplayTree **table;
|
|
193
|
+
#ifdef _OPENMP
|
|
194
|
+
omp_lock_t *tableLocks;
|
|
195
|
+
#endif
|
|
196
|
+
KmerOccurenceTable *kmerOccurenceTable;
|
|
197
|
+
int WORDLENGTH;
|
|
198
|
+
boolean double_strand;
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
SplayTable *newSplayTable(int WORDLENGTH, boolean double_strand)
|
|
202
|
+
{
|
|
203
|
+
SplayTable *splayTable = mallocOrExit(1, SplayTable);
|
|
204
|
+
splayTable->WORDLENGTH = WORDLENGTH;
|
|
205
|
+
splayTable->table = callocOrExit(HASH_BUCKETS_NB, SplayTree *);
|
|
206
|
+
splayTable->kmerOccurenceTable = NULL;
|
|
207
|
+
splayTable->double_strand = double_strand;
|
|
208
|
+
#ifdef _OPENMP
|
|
209
|
+
splayTable->tableLocks = mallocOrExit(HASH_BUCKETS_NB, omp_lock_t);
|
|
210
|
+
int i;
|
|
211
|
+
#pragma omp parallel for
|
|
212
|
+
for (i = 0; i < HASH_BUCKETS_NB; i++)
|
|
213
|
+
omp_init_lock(splayTable->tableLocks + i);
|
|
214
|
+
initSplayTreeMemory();
|
|
215
|
+
#endif
|
|
216
|
+
return splayTable;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
void destroySplayTable(SplayTable * splayTable)
|
|
220
|
+
{
|
|
221
|
+
velvetLog("Destroying splay table\n");
|
|
222
|
+
|
|
223
|
+
destroyAllSplayTrees();
|
|
224
|
+
free(splayTable->table);
|
|
225
|
+
destroyKmerOccurenceTable(splayTable->kmerOccurenceTable);
|
|
226
|
+
free(splayTable);
|
|
227
|
+
|
|
228
|
+
velvetLog("Splay table destroyed\n");
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
static KmerKey hash_kmer(Kmer * kmer)
|
|
232
|
+
{
|
|
233
|
+
#if KMER_LONGLONGS
|
|
234
|
+
KmerKey key = kmer->longlongs[0];
|
|
235
|
+
|
|
236
|
+
#if KMER_LONGLONGS > 1
|
|
237
|
+
key ^= kmer->longlongs[1];
|
|
238
|
+
#endif
|
|
239
|
+
#if KMER_LONGLONGS > 2
|
|
240
|
+
key ^= kmer->longlongs[2];
|
|
241
|
+
#endif
|
|
242
|
+
|
|
243
|
+
key = (~key) + (key << 21);
|
|
244
|
+
key = key ^ (key >> 24);
|
|
245
|
+
key = (key + (key << 3)) + (key << 8);
|
|
246
|
+
key = key ^ (key >> 14);
|
|
247
|
+
key = (key + (key << 2)) + (key << 4);
|
|
248
|
+
key = key ^ (key >> 28);
|
|
249
|
+
key = key + (key << 31);
|
|
250
|
+
|
|
251
|
+
return key % HASH_BUCKETS_NB;
|
|
252
|
+
#elif KMER_LONGS
|
|
253
|
+
KmerKey key = kmer->longs;
|
|
254
|
+
|
|
255
|
+
key += ~(key << 15);
|
|
256
|
+
key ^= (key >> 10);
|
|
257
|
+
key += (key << 3);
|
|
258
|
+
key ^= (key >> 6);
|
|
259
|
+
key += ~(key << 11);
|
|
260
|
+
key ^= (key >> 16);
|
|
261
|
+
|
|
262
|
+
return key % HASH_BUCKETS_NB;
|
|
263
|
+
|
|
264
|
+
#elif KMER_INTS
|
|
265
|
+
return kmer->ints % HASH_BUCKETS_NB;
|
|
266
|
+
#elif KMER_CHARS
|
|
267
|
+
return kmer->chars % HASH_BUCKETS_NB;
|
|
268
|
+
#endif
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
static Coordinate getNearestHSPIndex(Coordinate position, IDnum * sequenceIDs, Coordinate sequenceLength) {
|
|
272
|
+
Coordinate back_offset = -1;
|
|
273
|
+
Coordinate front_offset = -1;
|
|
274
|
+
|
|
275
|
+
for (back_offset = 1; position - back_offset > 0; back_offset++)
|
|
276
|
+
if (sequenceIDs[position - back_offset])
|
|
277
|
+
break;
|
|
278
|
+
|
|
279
|
+
for (front_offset = 1; position + front_offset < sequenceLength; front_offset++)
|
|
280
|
+
if (sequenceIDs[position + front_offset])
|
|
281
|
+
break;
|
|
282
|
+
|
|
283
|
+
if (back_offset == position && position + front_offset == sequenceLength)
|
|
284
|
+
return -1;
|
|
285
|
+
else if (back_offset == position)
|
|
286
|
+
return position + front_offset;
|
|
287
|
+
else if (front_offset + position == sequenceLength)
|
|
288
|
+
return position - back_offset;
|
|
289
|
+
else
|
|
290
|
+
return back_offset < front_offset? position - back_offset : position + front_offset;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
static KmerOccurence * getMostAppropriateHit(Coordinate readCoord, Coordinate readLength, boolean direct, KmerOccurence * kmerOccurence, IDnum mapCount, IDnum * mapSequenceID, Coordinate * mapCoord, int wordLength) {
|
|
294
|
+
KmerOccurence * current;
|
|
295
|
+
KmerOccurence * best = NULL;
|
|
296
|
+
Coordinate expectedPosition;
|
|
297
|
+
Coordinate positionError;
|
|
298
|
+
IDnum mapIndex;
|
|
299
|
+
|
|
300
|
+
// If only one hit
|
|
301
|
+
if (!getNextKmerOccurence(kmerOccurence))
|
|
302
|
+
return kmerOccurence;
|
|
303
|
+
|
|
304
|
+
// If multiple hits by unmapped read
|
|
305
|
+
if (mapCount == 0)
|
|
306
|
+
return NULL;
|
|
307
|
+
|
|
308
|
+
// Compare cases
|
|
309
|
+
for (current = kmerOccurence; current; current = getNextKmerOccurence(current)) {
|
|
310
|
+
for (mapIndex = 0; mapIndex < mapCount; mapIndex++) {
|
|
311
|
+
|
|
312
|
+
// If wrong sequence or unconsistent orientation
|
|
313
|
+
if ((direct && getKmerOccurenceNodeID(current) != mapSequenceID[mapIndex])
|
|
314
|
+
|| (!direct && getKmerOccurenceNodeID(current) != -mapSequenceID[mapIndex]))
|
|
315
|
+
continue;
|
|
316
|
+
|
|
317
|
+
// Compute where it is supposed to land on reference
|
|
318
|
+
if (mapSequenceID[mapIndex] < 0)
|
|
319
|
+
expectedPosition = mapCoord[mapIndex] + readLength - readCoord - 1;
|
|
320
|
+
else
|
|
321
|
+
expectedPosition = mapCoord[mapIndex] + readCoord - wordLength + 1;
|
|
322
|
+
|
|
323
|
+
// Compute positional error
|
|
324
|
+
positionError = getKmerOccurencePosition(current) - expectedPosition;
|
|
325
|
+
|
|
326
|
+
// If potential hit record
|
|
327
|
+
if (positionError < 1 && positionError > -1) {
|
|
328
|
+
if (best)
|
|
329
|
+
// If competing hit, give up
|
|
330
|
+
return NULL;
|
|
331
|
+
else
|
|
332
|
+
// Record current hit
|
|
333
|
+
best = current;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
return best;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
static inline boolean
|
|
342
|
+
doFindOrInsertOccurenceInSplayTree(Kmer * kmer, IDnum * seqID,
|
|
343
|
+
Coordinate * position, SplayTable *table)
|
|
344
|
+
{
|
|
345
|
+
#ifdef _OPENMP
|
|
346
|
+
const KmerKey kmerHash = hash_kmer(kmer);
|
|
347
|
+
boolean ret;
|
|
348
|
+
|
|
349
|
+
omp_set_lock(table->tableLocks + kmerHash);
|
|
350
|
+
ret = findOrInsertOccurenceInSplayTree(kmer, seqID, position,
|
|
351
|
+
table->table + kmerHash);
|
|
352
|
+
omp_unset_lock(table->tableLocks + kmerHash);
|
|
353
|
+
|
|
354
|
+
return ret;
|
|
355
|
+
#else
|
|
356
|
+
return findOrInsertOccurenceInSplayTree(kmer, seqID, position,
|
|
357
|
+
&table->table[hash_kmer(kmer)]);
|
|
358
|
+
#endif
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
static boolean findOrInsertOccurenceInSplayTable(Kmer * kmer, IDnum * seqID,
|
|
363
|
+
Coordinate * position,
|
|
364
|
+
SplayTable * table, IDnum * sequenceIDs,
|
|
365
|
+
Coordinate * coords, Coordinate readIndex, Coordinate readLength, boolean direct)
|
|
366
|
+
{
|
|
367
|
+
KmerOccurence * hit;
|
|
368
|
+
Coordinate HSPIndex;
|
|
369
|
+
|
|
370
|
+
// Check if previous anchor
|
|
371
|
+
if (sequenceIDs && sequenceIDs[readIndex]) {
|
|
372
|
+
if (direct)
|
|
373
|
+
*seqID = sequenceIDs[readIndex];
|
|
374
|
+
else
|
|
375
|
+
*seqID = -sequenceIDs[readIndex];
|
|
376
|
+
if (sequenceIDs[readIndex] > 0)
|
|
377
|
+
*position = coords[readIndex] + readIndex;
|
|
378
|
+
else
|
|
379
|
+
*position = coords[readIndex] - readIndex + readLength - 1;
|
|
380
|
+
|
|
381
|
+
return true;
|
|
382
|
+
}
|
|
383
|
+
else if (coords && coords[readIndex])
|
|
384
|
+
// If in buffer zone:
|
|
385
|
+
return doFindOrInsertOccurenceInSplayTree(kmer, seqID, position, table);
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
if (debug)
|
|
389
|
+
abort();
|
|
390
|
+
// Look up first in reference sequence k-mers
|
|
391
|
+
if (table->kmerOccurenceTable
|
|
392
|
+
&& (hit = findKmerInKmerOccurenceTable(kmer, table->kmerOccurenceTable))) {
|
|
393
|
+
if (!getNextKmerOccurence(hit)) {
|
|
394
|
+
*seqID = getKmerOccurenceNodeID(hit);
|
|
395
|
+
*position = getKmerOccurencePosition(hit);
|
|
396
|
+
return true;
|
|
397
|
+
} else if ((HSPIndex = getNearestHSPIndex(*position, sequenceIDs, readLength)) > 0) {
|
|
398
|
+
hit = getMostAppropriateHit(readIndex, readLength, direct, hit, 1, &(sequenceIDs[HSPIndex]), &(coords[HSPIndex]), table->WORDLENGTH);
|
|
399
|
+
if (hit) {
|
|
400
|
+
*seqID = getKmerOccurenceNodeID(hit);
|
|
401
|
+
*position = getKmerOccurencePosition(hit);
|
|
402
|
+
return true;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// If not, go through the novel k-mers
|
|
409
|
+
return doFindOrInsertOccurenceInSplayTree(kmer, seqID, position, table);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
static void printAnnotations(IDnum *sequenceIDs, Coordinate * coords,
|
|
413
|
+
TightString * array, SplayTable * table,
|
|
414
|
+
FILE * file, boolean second_in_pair, IDnum seqID)
|
|
415
|
+
{
|
|
416
|
+
Coordinate readNucleotideIndex = 0;
|
|
417
|
+
Coordinate writeNucleotideIndex = 0;
|
|
418
|
+
Kmer word;
|
|
419
|
+
Kmer antiWord;
|
|
420
|
+
boolean annotationClosed = true;
|
|
421
|
+
IDnum sequenceID;
|
|
422
|
+
Coordinate coord;
|
|
423
|
+
boolean found;
|
|
424
|
+
Coordinate position = 0;
|
|
425
|
+
Coordinate start = 0;
|
|
426
|
+
Coordinate finish = 0;
|
|
427
|
+
IDnum referenceSequenceID = 0;
|
|
428
|
+
Nucleotide nucleotide;
|
|
429
|
+
char lineBuffer[MAXLINE];
|
|
430
|
+
TightString * tString = getTightStringInArray(array, seqID - 1);
|
|
431
|
+
int thread = 0;
|
|
432
|
+
|
|
433
|
+
clearKmer(&word);
|
|
434
|
+
clearKmer(&antiWord);
|
|
435
|
+
|
|
436
|
+
#ifdef _OPENMP
|
|
437
|
+
thread = omp_get_thread_num();
|
|
438
|
+
#endif
|
|
439
|
+
|
|
440
|
+
if (debug)
|
|
441
|
+
abort();
|
|
442
|
+
|
|
443
|
+
sprintf(lineBuffer, "ROADMAP %li\n", (long)seqID);
|
|
444
|
+
appendLine(lineBuffer, thread);
|
|
445
|
+
|
|
446
|
+
// Neglect any string shorter than WORDLENGTH :
|
|
447
|
+
if (getLength(tString) < table->WORDLENGTH) {
|
|
448
|
+
#ifdef _OPENMP
|
|
449
|
+
pushBuffer(thread);
|
|
450
|
+
#else
|
|
451
|
+
velvetFprintf(file, "%s", annotationBuffer->str);
|
|
452
|
+
resetStringBuffer(annotationBuffer);
|
|
453
|
+
#endif
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// Fill in the initial word :
|
|
458
|
+
for (readNucleotideIndex = 0;
|
|
459
|
+
readNucleotideIndex < table->WORDLENGTH - 1;
|
|
460
|
+
readNucleotideIndex++) {
|
|
461
|
+
nucleotide = getNucleotide(readNucleotideIndex, tString);
|
|
462
|
+
pushNucleotide(&word, nucleotide);
|
|
463
|
+
#ifdef COLOR
|
|
464
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
465
|
+
#else
|
|
466
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
467
|
+
#endif
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
while (readNucleotideIndex < getLength(tString)) {
|
|
471
|
+
// Shift word:
|
|
472
|
+
nucleotide = getNucleotide(readNucleotideIndex, tString);
|
|
473
|
+
pushNucleotide(&word, nucleotide);
|
|
474
|
+
#ifdef COLOR
|
|
475
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
476
|
+
#else
|
|
477
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
478
|
+
#endif
|
|
479
|
+
|
|
480
|
+
sequenceID = seqID;
|
|
481
|
+
coord = writeNucleotideIndex;
|
|
482
|
+
|
|
483
|
+
if (table->double_strand) {
|
|
484
|
+
if (compareKmers(&word, &antiWord) <= 0) {
|
|
485
|
+
found =
|
|
486
|
+
findOrInsertOccurenceInSplayTable(&word,
|
|
487
|
+
&sequenceID,
|
|
488
|
+
&coord,
|
|
489
|
+
table,
|
|
490
|
+
sequenceIDs,
|
|
491
|
+
coords,
|
|
492
|
+
readNucleotideIndex,
|
|
493
|
+
getLength(tString),
|
|
494
|
+
true);
|
|
495
|
+
} else {
|
|
496
|
+
sequenceID = -sequenceID;
|
|
497
|
+
found =
|
|
498
|
+
findOrInsertOccurenceInSplayTable(&antiWord,
|
|
499
|
+
&sequenceID,
|
|
500
|
+
&coord,
|
|
501
|
+
table,
|
|
502
|
+
sequenceIDs,
|
|
503
|
+
coords,
|
|
504
|
+
readNucleotideIndex,
|
|
505
|
+
getLength(tString),
|
|
506
|
+
false);
|
|
507
|
+
sequenceID = -sequenceID;
|
|
508
|
+
}
|
|
509
|
+
} else {
|
|
510
|
+
if (!second_in_pair) {
|
|
511
|
+
found =
|
|
512
|
+
findOrInsertOccurenceInSplayTable(&word,
|
|
513
|
+
&sequenceID,
|
|
514
|
+
&coord,
|
|
515
|
+
table,
|
|
516
|
+
sequenceIDs,
|
|
517
|
+
coords,
|
|
518
|
+
readNucleotideIndex,
|
|
519
|
+
getLength(tString),
|
|
520
|
+
true);
|
|
521
|
+
} else {
|
|
522
|
+
sequenceID = -sequenceID;
|
|
523
|
+
found =
|
|
524
|
+
findOrInsertOccurenceInSplayTable(&antiWord,
|
|
525
|
+
&sequenceID,
|
|
526
|
+
&coord,
|
|
527
|
+
table,
|
|
528
|
+
sequenceIDs,
|
|
529
|
+
coords,
|
|
530
|
+
readNucleotideIndex,
|
|
531
|
+
getLength(tString),
|
|
532
|
+
false);
|
|
533
|
+
sequenceID = -sequenceID;
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
if (!found) {
|
|
538
|
+
writeNucleotideIndex++;
|
|
539
|
+
if (!annotationClosed) {
|
|
540
|
+
sprintf(lineBuffer, "%ld\t%lld\t%lld\t%lld\n",
|
|
541
|
+
(long) referenceSequenceID, (long long) position,
|
|
542
|
+
(long long) start, (long long) finish);
|
|
543
|
+
appendLine(lineBuffer, thread);
|
|
544
|
+
}
|
|
545
|
+
annotationClosed = true;
|
|
546
|
+
}
|
|
547
|
+
// Otherwise create/complete annotation:
|
|
548
|
+
else {
|
|
549
|
+
// Forbidden k-mer
|
|
550
|
+
if (sequenceID == 0) {
|
|
551
|
+
break;
|
|
552
|
+
}
|
|
553
|
+
// Closed/inexistant annotation
|
|
554
|
+
else if (annotationClosed) {
|
|
555
|
+
referenceSequenceID = sequenceID;
|
|
556
|
+
position = writeNucleotideIndex;
|
|
557
|
+
start = finish = coord;
|
|
558
|
+
|
|
559
|
+
if (referenceSequenceID > 0)
|
|
560
|
+
finish++;
|
|
561
|
+
else
|
|
562
|
+
finish--;
|
|
563
|
+
|
|
564
|
+
annotationClosed = false;
|
|
565
|
+
}
|
|
566
|
+
// Open annotation
|
|
567
|
+
else if (sequenceID == referenceSequenceID
|
|
568
|
+
&& coord == finish) {
|
|
569
|
+
if (referenceSequenceID > 0)
|
|
570
|
+
finish++;
|
|
571
|
+
else
|
|
572
|
+
finish--;
|
|
573
|
+
}
|
|
574
|
+
// Previous non corresponding annotation
|
|
575
|
+
else {
|
|
576
|
+
sprintf(lineBuffer, "%ld\t%lld\t%lld\t%lld\n",
|
|
577
|
+
(long) referenceSequenceID, (long long) position,
|
|
578
|
+
(long long) start, (long long) finish);
|
|
579
|
+
appendLine(lineBuffer, thread);
|
|
580
|
+
|
|
581
|
+
referenceSequenceID = sequenceID;
|
|
582
|
+
position = writeNucleotideIndex;
|
|
583
|
+
start = finish = coord;
|
|
584
|
+
|
|
585
|
+
if (referenceSequenceID > 0)
|
|
586
|
+
finish++;
|
|
587
|
+
else
|
|
588
|
+
finish--;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
readNucleotideIndex++;
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
if (!annotationClosed) {
|
|
596
|
+
sprintf(lineBuffer, "%ld\t%lld\t%lld\t%lld\n",
|
|
597
|
+
(long) referenceSequenceID, (long long) position,
|
|
598
|
+
(long long) start, (long long) finish);
|
|
599
|
+
appendLine(lineBuffer, thread);
|
|
600
|
+
}
|
|
601
|
+
#ifdef _OPENMP
|
|
602
|
+
pushBuffer(thread);
|
|
603
|
+
#else
|
|
604
|
+
velvetFprintf(file, "%s", annotationBuffer->str);
|
|
605
|
+
resetStringBuffer(annotationBuffer);
|
|
606
|
+
#endif
|
|
607
|
+
|
|
608
|
+
return;
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
static void computeClearHSPs(TightString * array, boolean second_in_pair, SplayTable * table, IDnum ** sequenceIDs, Coordinate ** coords, IDnum * mapReferenceIDs, Coordinate * mapCoords, Coordinate mapCount, IDnum seqID) {
|
|
612
|
+
Coordinate readNucleotideIndex = 0;
|
|
613
|
+
Kmer word;
|
|
614
|
+
Kmer antiWord;
|
|
615
|
+
Kmer polyA;
|
|
616
|
+
Nucleotide nucleotide;
|
|
617
|
+
KmerOccurence * hit;
|
|
618
|
+
|
|
619
|
+
int penalty;
|
|
620
|
+
TightString * tString;
|
|
621
|
+
Coordinate length;
|
|
622
|
+
|
|
623
|
+
clearKmer(&polyA);
|
|
624
|
+
tString = getTightStringInArray(array, seqID - 1);
|
|
625
|
+
length = getLength(tString);
|
|
626
|
+
*sequenceIDs = callocOrExit(length, IDnum);
|
|
627
|
+
*coords = callocOrExit(length, Coordinate);
|
|
628
|
+
|
|
629
|
+
// First pass for unambiguous hits
|
|
630
|
+
// Fill in the initial word :
|
|
631
|
+
clearKmer(&word);
|
|
632
|
+
clearKmer(&antiWord);
|
|
633
|
+
for (readNucleotideIndex = 0;
|
|
634
|
+
readNucleotideIndex < table->WORDLENGTH - 1;
|
|
635
|
+
readNucleotideIndex++) {
|
|
636
|
+
nucleotide = getNucleotide(readNucleotideIndex, tString);
|
|
637
|
+
pushNucleotide(&word, nucleotide);
|
|
638
|
+
#ifdef COLOR
|
|
639
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
640
|
+
#else
|
|
641
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
642
|
+
#endif
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
// Kill silly poly-T beginnings
|
|
646
|
+
while (readNucleotideIndex < getLength(tString) && (compareKmers(&antiWord, &polyA) == 0 || compareKmers(&word, &polyA) == 0)) {
|
|
647
|
+
nucleotide = getNucleotide(readNucleotideIndex++, tString);
|
|
648
|
+
pushNucleotide(&word, nucleotide);
|
|
649
|
+
#ifdef COLOR
|
|
650
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
651
|
+
#else
|
|
652
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
653
|
+
#endif
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
while (readNucleotideIndex < getLength(tString)) {
|
|
657
|
+
// Shift word:
|
|
658
|
+
nucleotide = getNucleotide(readNucleotideIndex, tString);
|
|
659
|
+
pushNucleotide(&word, nucleotide);
|
|
660
|
+
|
|
661
|
+
#ifdef COLOR
|
|
662
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
663
|
+
#else
|
|
664
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
665
|
+
#endif
|
|
666
|
+
|
|
667
|
+
if (table->double_strand) {
|
|
668
|
+
if (compareKmers(&word, &antiWord) <= 0) {
|
|
669
|
+
hit = findKmerInKmerOccurenceTable(&word, table->kmerOccurenceTable);
|
|
670
|
+
|
|
671
|
+
if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), true, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
|
|
672
|
+
(*sequenceIDs)[readNucleotideIndex] = getKmerOccurenceNodeID(hit);
|
|
673
|
+
} else {
|
|
674
|
+
hit = findKmerInKmerOccurenceTable(&antiWord, table->kmerOccurenceTable);
|
|
675
|
+
|
|
676
|
+
if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), false, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
|
|
677
|
+
(*sequenceIDs)[readNucleotideIndex] = -getKmerOccurenceNodeID(hit);
|
|
678
|
+
}
|
|
679
|
+
} else {
|
|
680
|
+
if (!second_in_pair) {
|
|
681
|
+
hit = findKmerInKmerOccurenceTable(&word, table->kmerOccurenceTable);
|
|
682
|
+
|
|
683
|
+
if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), true, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
|
|
684
|
+
(*sequenceIDs)[readNucleotideIndex] = getKmerOccurenceNodeID(hit);
|
|
685
|
+
} else {
|
|
686
|
+
hit = findKmerInKmerOccurenceTable(&antiWord, table->kmerOccurenceTable);
|
|
687
|
+
|
|
688
|
+
if (hit && (hit = getMostAppropriateHit(readNucleotideIndex, getLength(tString), false, hit, mapCount, mapReferenceIDs, mapCoords, table->WORDLENGTH)))
|
|
689
|
+
(*sequenceIDs)[readNucleotideIndex] = -getKmerOccurenceNodeID(hit);
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
if ((*sequenceIDs)[readNucleotideIndex]) {
|
|
694
|
+
if ((*sequenceIDs)[readNucleotideIndex] > 0)
|
|
695
|
+
(*coords)[readNucleotideIndex] = getKmerOccurencePosition(hit) - readNucleotideIndex;
|
|
696
|
+
else
|
|
697
|
+
(*coords)[readNucleotideIndex] = getKmerOccurencePosition(hit) + readNucleotideIndex - getLength(tString) + 1;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// Barrier to flip-flopping
|
|
701
|
+
if ((*sequenceIDs)[readNucleotideIndex - 1] != 0
|
|
702
|
+
&& ((*sequenceIDs)[readNucleotideIndex] != (*sequenceIDs)[readNucleotideIndex - 1]
|
|
703
|
+
|| (*coords)[readNucleotideIndex] != (*coords)[readNucleotideIndex - 1])) {
|
|
704
|
+
// Break in continuity... skip k positions
|
|
705
|
+
(*sequenceIDs)[readNucleotideIndex] = 0;
|
|
706
|
+
(*coords)[readNucleotideIndex] = -1;
|
|
707
|
+
readNucleotideIndex++;
|
|
708
|
+
|
|
709
|
+
for (penalty = 0; penalty < table->WORDLENGTH - 1 && readNucleotideIndex < getLength(tString); penalty++) {
|
|
710
|
+
nucleotide = getNucleotide(readNucleotideIndex, tString);
|
|
711
|
+
pushNucleotide(&word, nucleotide);
|
|
712
|
+
|
|
713
|
+
#ifdef COLOR
|
|
714
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
715
|
+
#else
|
|
716
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
717
|
+
#endif
|
|
718
|
+
(*sequenceIDs)[readNucleotideIndex] = 0;
|
|
719
|
+
(*coords)[readNucleotideIndex] = -1;
|
|
720
|
+
readNucleotideIndex++;
|
|
721
|
+
}
|
|
722
|
+
} else
|
|
723
|
+
readNucleotideIndex++;
|
|
724
|
+
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
free(mapReferenceIDs);
|
|
728
|
+
free(mapCoords);
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
void inputSequenceIntoSplayTable(TightString * array,
|
|
732
|
+
SplayTable * table,
|
|
733
|
+
FILE * file,
|
|
734
|
+
boolean second_in_pair,
|
|
735
|
+
IDnum * mapReferenceIDs, Coordinate * mapCoords, Coordinate mapCount,
|
|
736
|
+
IDnum seqID)
|
|
737
|
+
{
|
|
738
|
+
IDnum * sequenceIDs = NULL;
|
|
739
|
+
Coordinate * coords = NULL;
|
|
740
|
+
|
|
741
|
+
//debug = (seqID == 29405);
|
|
742
|
+
|
|
743
|
+
// If appropriate, get the HSPs on reference sequences
|
|
744
|
+
if (table->kmerOccurenceTable)
|
|
745
|
+
computeClearHSPs(array, second_in_pair, table, &sequenceIDs, &coords, mapReferenceIDs, mapCoords, mapCount, seqID);
|
|
746
|
+
|
|
747
|
+
// Go through read, eventually with annotations
|
|
748
|
+
printAnnotations(sequenceIDs, coords, array, table, file, second_in_pair, seqID);
|
|
749
|
+
|
|
750
|
+
// Clean up
|
|
751
|
+
if (sequenceIDs) {
|
|
752
|
+
free(sequenceIDs);
|
|
753
|
+
free(coords);
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
void inputReferenceIntoSplayTable(TightString * tString,
|
|
758
|
+
SplayTable * table, FILE * file, IDnum seqID, Mask * mask)
|
|
759
|
+
{
|
|
760
|
+
IDnum currentIndex;
|
|
761
|
+
Coordinate readNucleotideIndex = 0;
|
|
762
|
+
Coordinate kmerIndex = 0;
|
|
763
|
+
Kmer word;
|
|
764
|
+
Kmer antiWord;
|
|
765
|
+
Nucleotide nucleotide;
|
|
766
|
+
Mask * currentMask = mask;
|
|
767
|
+
#ifdef _OPENMP
|
|
768
|
+
char lineBuffer[MAXLINE];
|
|
769
|
+
#endif
|
|
770
|
+
|
|
771
|
+
clearKmer(&word);
|
|
772
|
+
clearKmer(&antiWord);
|
|
773
|
+
|
|
774
|
+
currentIndex = seqID;
|
|
775
|
+
#ifdef _OPENMP
|
|
776
|
+
sprintf(lineBuffer, "ROADMAP %li\n", (long)currentIndex);
|
|
777
|
+
appendLine(lineBuffer, omp_get_thread_num());
|
|
778
|
+
#else
|
|
779
|
+
velvetFprintf(file, "ROADMAP %li\n", (long)currentIndex);
|
|
780
|
+
#endif
|
|
781
|
+
|
|
782
|
+
// Neglect any string shorter than WORDLENGTH :
|
|
783
|
+
if (getLength(tString) < table->WORDLENGTH) {
|
|
784
|
+
return;
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
// Fill in the initial word :
|
|
788
|
+
for (readNucleotideIndex = 0;
|
|
789
|
+
readNucleotideIndex < table->WORDLENGTH - 1;
|
|
790
|
+
readNucleotideIndex++) {
|
|
791
|
+
nucleotide = getNucleotide(readNucleotideIndex, tString);
|
|
792
|
+
pushNucleotide(&word, nucleotide);
|
|
793
|
+
if (table->double_strand) {
|
|
794
|
+
#ifdef COLOR
|
|
795
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
796
|
+
#else
|
|
797
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
798
|
+
#endif
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
while (readNucleotideIndex < getLength(tString)) {
|
|
803
|
+
// Shift word:
|
|
804
|
+
nucleotide = getNucleotide(readNucleotideIndex, tString);
|
|
805
|
+
pushNucleotide(&word, nucleotide);
|
|
806
|
+
|
|
807
|
+
if (table->double_strand) {
|
|
808
|
+
#ifdef COLOR
|
|
809
|
+
reversePushNucleotide(&antiWord, nucleotide);
|
|
810
|
+
#else
|
|
811
|
+
reversePushNucleotide(&antiWord, 3 - nucleotide);
|
|
812
|
+
#endif
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
// Check for gap masks:
|
|
816
|
+
if (currentMask && currentMask->start - table->WORDLENGTH + 1 <= readNucleotideIndex) {
|
|
817
|
+
while(currentMask && currentMask->finish + table->WORDLENGTH - 1 < readNucleotideIndex)
|
|
818
|
+
currentMask = currentMask->next;
|
|
819
|
+
|
|
820
|
+
if (currentMask && currentMask->finish + table->WORDLENGTH - 1 >= readNucleotideIndex) {
|
|
821
|
+
readNucleotideIndex++;
|
|
822
|
+
kmerIndex++;
|
|
823
|
+
continue;
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
// Record k-mer
|
|
828
|
+
if (table->double_strand) {
|
|
829
|
+
if (compareKmers(&word, &antiWord) <= 0)
|
|
830
|
+
recordKmerOccurence(&word, currentIndex,
|
|
831
|
+
kmerIndex,
|
|
832
|
+
table->kmerOccurenceTable);
|
|
833
|
+
else
|
|
834
|
+
recordKmerOccurence(&antiWord, -currentIndex,
|
|
835
|
+
kmerIndex,
|
|
836
|
+
table->kmerOccurenceTable);
|
|
837
|
+
} else {
|
|
838
|
+
recordKmerOccurence(&word, currentIndex,
|
|
839
|
+
kmerIndex,
|
|
840
|
+
table->kmerOccurenceTable);
|
|
841
|
+
}
|
|
842
|
+
readNucleotideIndex++;
|
|
843
|
+
kmerIndex++;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
return;
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
static Coordinate countReferenceKmers(ReadSet * reads, int wordLength) {
|
|
850
|
+
IDnum readIndex;
|
|
851
|
+
Coordinate length = 0;
|
|
852
|
+
|
|
853
|
+
|
|
854
|
+
for (readIndex = 0; readIndex < reads->readCount && reads->categories[readIndex] == REFERENCE; readIndex++)
|
|
855
|
+
{
|
|
856
|
+
Coordinate tmpLength = getLength(getTightStringInArray(reads->tSequences, readIndex));
|
|
857
|
+
if (tmpLength >= wordLength)
|
|
858
|
+
length += tmpLength - wordLength + 1;
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
return length;
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
Mask ** scanReferenceSequences(FILE * file, IDnum referenceSequenceCount) {
|
|
865
|
+
Mask ** referenceMasks = callocOrExit(referenceSequenceCount, Mask*);
|
|
866
|
+
IDnum index;
|
|
867
|
+
char line[MAXLINE];
|
|
868
|
+
char c = '\0';
|
|
869
|
+
|
|
870
|
+
// Search sequences for masks
|
|
871
|
+
for (index = 0; index < referenceSequenceCount; index++) {
|
|
872
|
+
Mask * current = NULL;
|
|
873
|
+
Coordinate position = 0;
|
|
874
|
+
boolean openMask = false;
|
|
875
|
+
|
|
876
|
+
// Read through header
|
|
877
|
+
fgets(line, MAXLINE, file);
|
|
878
|
+
|
|
879
|
+
// Read through sequence
|
|
880
|
+
while ((c = getc(file))) {
|
|
881
|
+
if (c == EOF || c == '>')
|
|
882
|
+
break;
|
|
883
|
+
else if (c == '\r' || c == '\n')
|
|
884
|
+
continue;
|
|
885
|
+
else if (c == 'n' || c == 'N') {
|
|
886
|
+
if (openMask)
|
|
887
|
+
current->finish++;
|
|
888
|
+
else if (referenceMasks[index] == NULL) {
|
|
889
|
+
referenceMasks[index] = newMask(position);
|
|
890
|
+
current = referenceMasks[index];
|
|
891
|
+
} else {
|
|
892
|
+
current->next = newMask(position);
|
|
893
|
+
current = current->next;
|
|
894
|
+
}
|
|
895
|
+
openMask = true;
|
|
896
|
+
position++;
|
|
897
|
+
} else {
|
|
898
|
+
openMask = false;
|
|
899
|
+
position++;
|
|
900
|
+
}
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
if (c != '\0')
|
|
905
|
+
ungetc(c, file);
|
|
906
|
+
return referenceMasks;
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
Mask ** scanBinaryReferenceSequences(SequencesReader *seqReadInfo, IDnum referenceSequenceCount) {
|
|
910
|
+
Mask ** referenceMasks = callocOrExit(referenceSequenceCount, Mask*);
|
|
911
|
+
IDnum index;
|
|
912
|
+
char line[MAXLINE];
|
|
913
|
+
char c = '\0';
|
|
914
|
+
FILE *file = fopen(seqReadInfo->m_namesFilename, "r");
|
|
915
|
+
if (file == NULL) {
|
|
916
|
+
exitErrorf(EXIT_FAILURE, true, "Couldn't read file %s", seqReadInfo->m_namesFilename);
|
|
917
|
+
} else {
|
|
918
|
+
velvetLog("Reading mapping info from %s\n", seqReadInfo->m_namesFilename);
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
// Search sequences for masks
|
|
922
|
+
for (index = 0; index < referenceSequenceCount; index++) {
|
|
923
|
+
Mask * current = NULL;
|
|
924
|
+
long start = 0;
|
|
925
|
+
long finish = 0;
|
|
926
|
+
long number;
|
|
927
|
+
long cat;
|
|
928
|
+
|
|
929
|
+
// Read through header
|
|
930
|
+
if ((c = getc(file)) != '>') {
|
|
931
|
+
exitErrorf(EXIT_FAILURE, false, "names line did not start with >");
|
|
932
|
+
}
|
|
933
|
+
fgets(line, MAXLINE, file);
|
|
934
|
+
sscanf(line, "%*[^\t]\t%li\t%li\n", &number, &cat);
|
|
935
|
+
// ensure is is a ref cat
|
|
936
|
+
if ((IDnum) number != index + 1) {
|
|
937
|
+
exitErrorf(EXIT_FAILURE, false, "sequence %ld != expected %ld", number, (long) index);
|
|
938
|
+
}
|
|
939
|
+
if ((Category) cat != REFERENCE) {
|
|
940
|
+
exitErrorf(EXIT_FAILURE, false, "unexpected category %ld", cat);
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
// Read through the reference maps
|
|
944
|
+
while ((c = getc(file))) {
|
|
945
|
+
if (c == EOF || c == '>') {
|
|
946
|
+
break;
|
|
947
|
+
}
|
|
948
|
+
ungetc(c, file);
|
|
949
|
+
fgets(line, MAXLINE, file);
|
|
950
|
+
sscanf(line, "%li\t%li\n", &start, &finish);
|
|
951
|
+
if (referenceMasks[index] == NULL) {
|
|
952
|
+
referenceMasks[index] = newMask(start);
|
|
953
|
+
referenceMasks[index]->finish = finish;
|
|
954
|
+
current = referenceMasks[index];
|
|
955
|
+
} else {
|
|
956
|
+
current->next = newMask(start);
|
|
957
|
+
current->next->finish = finish;
|
|
958
|
+
current = current->next;
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
ungetc(c, file);
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
fclose(file);
|
|
965
|
+
return referenceMasks;
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
void inputSequenceArrayIntoSplayTableAndArchive(ReadSet * reads,
|
|
969
|
+
SplayTable * table,
|
|
970
|
+
char *filename, char* seqFilename)
|
|
971
|
+
{
|
|
972
|
+
IDnum index;
|
|
973
|
+
IDnum sequenceCount = reads->readCount;
|
|
974
|
+
TightString *array;
|
|
975
|
+
FILE *outfile = fopen(filename, "w");
|
|
976
|
+
FILE *seqFile = NULL;
|
|
977
|
+
IDnum kmerCount;
|
|
978
|
+
IDnum referenceSequenceCount = 0;
|
|
979
|
+
struct timeval start, end, diff;
|
|
980
|
+
SequencesReader seqReadInfo;
|
|
981
|
+
memset(&seqReadInfo, 0, sizeof(seqReadInfo));
|
|
982
|
+
if (isCreateBinary()) {
|
|
983
|
+
seqReadInfo.m_bIsBinary = true;
|
|
984
|
+
seqReadInfo.m_pFile = openCnySeqForRead(seqFilename, &seqReadInfo.m_unifiedSeqFileHeader);
|
|
985
|
+
if (!seqReadInfo.m_pFile) {
|
|
986
|
+
exitErrorf(EXIT_FAILURE, true, "Could not open %s", seqFilename);
|
|
987
|
+
}
|
|
988
|
+
seqReadInfo.m_namesFilename = mallocOrExit(strlen(seqFilename) + sizeof(".names"), char);
|
|
989
|
+
sprintf(seqReadInfo.m_namesFilename, "%s.names", seqFilename);
|
|
990
|
+
seqReadInfo.m_numCategories = seqReadInfo.m_unifiedSeqFileHeader.m_numCategories;
|
|
991
|
+
seqReadInfo.m_minSeqLen = seqReadInfo.m_unifiedSeqFileHeader.m_minSeqLen;
|
|
992
|
+
seqReadInfo.m_maxSeqLen = seqReadInfo.m_unifiedSeqFileHeader.m_maxSeqLen;
|
|
993
|
+
seqReadInfo.m_bIsRef = false;
|
|
994
|
+
seqReadInfo.m_pReadBuffer = mallocOrExit(USF_READ_BUF_SIZE, uint8_t );
|
|
995
|
+
seqReadInfo.m_pCurrentReadPtr = seqReadInfo.m_pReadBufEnd = 0;
|
|
996
|
+
|
|
997
|
+
resetCnySeqCurrentRead(&seqReadInfo);
|
|
998
|
+
} else {
|
|
999
|
+
seqReadInfo.m_bIsBinary = false;
|
|
1000
|
+
}
|
|
1001
|
+
IDnum ** mapReferenceIDs = NULL;
|
|
1002
|
+
Coordinate ** mapCoords = NULL;
|
|
1003
|
+
Coordinate * mapCount = NULL;
|
|
1004
|
+
|
|
1005
|
+
char line[MAXLINE];
|
|
1006
|
+
char c;
|
|
1007
|
+
IDnum seqID = 0;
|
|
1008
|
+
long long_var;
|
|
1009
|
+
long long longlong_var;
|
|
1010
|
+
Coordinate maxCount = 20;
|
|
1011
|
+
Coordinate counter = 0;
|
|
1012
|
+
// DEBUG
|
|
1013
|
+
Mask ** referenceMasks = NULL;
|
|
1014
|
+
|
|
1015
|
+
if (outfile == NULL)
|
|
1016
|
+
exitErrorf(EXIT_FAILURE, true, "Couldn't write to file %s", filename);
|
|
1017
|
+
else
|
|
1018
|
+
velvetLog("Writing into roadmap file %s...\n", filename);
|
|
1019
|
+
|
|
1020
|
+
// Count reference sequences
|
|
1021
|
+
for (index = 0; index < reads->readCount && reads->categories[index] == REFERENCE; index++)
|
|
1022
|
+
referenceSequenceCount++;
|
|
1023
|
+
|
|
1024
|
+
velvetFprintf(outfile, "%ld\t%ld\t%i\t%hi\n", (long) sequenceCount, (long) referenceSequenceCount, table->WORDLENGTH, (short) table->double_strand);
|
|
1025
|
+
|
|
1026
|
+
if (reads->tSequences == NULL)
|
|
1027
|
+
convertSequences(reads);
|
|
1028
|
+
|
|
1029
|
+
gettimeofday(&start, NULL);
|
|
1030
|
+
array = reads->tSequences;
|
|
1031
|
+
|
|
1032
|
+
#ifdef _OPENMP
|
|
1033
|
+
if (omp_get_max_threads() == 1)
|
|
1034
|
+
{
|
|
1035
|
+
omp_set_num_threads(2);
|
|
1036
|
+
omp_set_nested(0);
|
|
1037
|
+
}
|
|
1038
|
+
else
|
|
1039
|
+
omp_set_nested(1);
|
|
1040
|
+
initAnnotationBuffers();
|
|
1041
|
+
#else
|
|
1042
|
+
annotationBuffer = newStringBuffer(BUFFER_SIZE);
|
|
1043
|
+
#endif
|
|
1044
|
+
|
|
1045
|
+
if (referenceSequenceCount && (kmerCount = countReferenceKmers(reads, table->WORDLENGTH)) > 0) {
|
|
1046
|
+
table->kmerOccurenceTable = newKmerOccurenceTable(24 , table->WORDLENGTH);
|
|
1047
|
+
allocateKmerOccurences(kmerCount, table->kmerOccurenceTable);
|
|
1048
|
+
if (seqReadInfo.m_bIsBinary) {
|
|
1049
|
+
referenceMasks = scanBinaryReferenceSequences(&seqReadInfo, referenceSequenceCount);
|
|
1050
|
+
// binary seqs have no Ns so just advance past the references
|
|
1051
|
+
for (index = 0; index < referenceSequenceCount; index++) {
|
|
1052
|
+
TightString cmpString;
|
|
1053
|
+
cmpString.length = seqReadInfo.m_currentReadLength;
|
|
1054
|
+
cmpString.sequence = mallocOrExit((seqReadInfo.m_currentReadLength + 3) / 4, uint8_t );
|
|
1055
|
+
getCnySeqNucl(&seqReadInfo, cmpString.sequence);
|
|
1056
|
+
if (seqReadInfo.m_bIsRef) {
|
|
1057
|
+
seqReadInfo.m_refCnt = readCnySeqUint32(&seqReadInfo);
|
|
1058
|
+
// now the next ptr is advanced
|
|
1059
|
+
seqReadInfo.m_pNextReadPtr += (sizeof(RefInfo) * seqReadInfo.m_refCnt);
|
|
1060
|
+
RefInfo refElem;
|
|
1061
|
+
uint32_t refIdx;
|
|
1062
|
+
for (refIdx = 0; refIdx < seqReadInfo.m_refCnt; refIdx++) {
|
|
1063
|
+
// not actually used so just read past refs
|
|
1064
|
+
refElem.m_referenceID = readCnySeqUint32(&seqReadInfo);
|
|
1065
|
+
refElem.m_pos = readCnySeqUint32(&seqReadInfo);
|
|
1066
|
+
}
|
|
1067
|
+
}
|
|
1068
|
+
// optional test to ensure reference mapping seqIDs are in sync
|
|
1069
|
+
#if 0
|
|
1070
|
+
TightString *tString;
|
|
1071
|
+
tString = getTightStringInArray(array, index);
|
|
1072
|
+
if (getLength(tString) != seqReadInfo.m_currentReadLength) {
|
|
1073
|
+
velvetLog("Error: TightString len mismatch, %d != %ld\n", getLength(tString), seqReadInfo.m_currentReadLength);
|
|
1074
|
+
exit(1);
|
|
1075
|
+
}
|
|
1076
|
+
char *str = readTightString(tString);
|
|
1077
|
+
char *cmpStr = readTightString(&cmpString);
|
|
1078
|
+
if (strcmp(str, cmpStr) != 0) {
|
|
1079
|
+
printf("seq %s != cmp %s\n", str, cmpStr);
|
|
1080
|
+
exit(1);
|
|
1081
|
+
}
|
|
1082
|
+
free(str);
|
|
1083
|
+
free(cmpStr);
|
|
1084
|
+
#endif
|
|
1085
|
+
advanceCnySeqCurrentRead(&seqReadInfo);
|
|
1086
|
+
free(cmpString.sequence);
|
|
1087
|
+
}
|
|
1088
|
+
} else {
|
|
1089
|
+
seqFile = fopen(seqFilename, "r");
|
|
1090
|
+
|
|
1091
|
+
if (seqFile == NULL)
|
|
1092
|
+
exitErrorf(EXIT_FAILURE, true, "Couldn't write to file %s", seqFilename);
|
|
1093
|
+
else
|
|
1094
|
+
velvetLog("Reading mapping info from file %s\n", seqFilename);
|
|
1095
|
+
|
|
1096
|
+
seqReadInfo.m_pFile = seqFile;
|
|
1097
|
+
|
|
1098
|
+
// Skip through reference headers quickly
|
|
1099
|
+
referenceMasks = scanReferenceSequences(seqFile, referenceSequenceCount);
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
#ifdef _OPENMP
|
|
1103
|
+
producing = 1;
|
|
1104
|
+
#pragma omp parallel sections
|
|
1105
|
+
{
|
|
1106
|
+
#pragma omp section
|
|
1107
|
+
{
|
|
1108
|
+
bufferWritter(outfile);
|
|
1109
|
+
}
|
|
1110
|
+
#pragma omp section
|
|
1111
|
+
{
|
|
1112
|
+
#pragma omp parallel for
|
|
1113
|
+
#endif
|
|
1114
|
+
for (index = 0; index < referenceSequenceCount; index++)
|
|
1115
|
+
inputReferenceIntoSplayTable(getTightStringInArray(array, index),
|
|
1116
|
+
table, outfile, index + 1, referenceMasks[index]);
|
|
1117
|
+
|
|
1118
|
+
#ifdef _OPENMP
|
|
1119
|
+
for (index = omp_get_max_threads() - 1; index >= 0; index--)
|
|
1120
|
+
pushBufferCommit(index);
|
|
1121
|
+
producing = 0;
|
|
1122
|
+
#pragma omp flush(producing)
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
#endif
|
|
1126
|
+
|
|
1127
|
+
if (maskMemory)
|
|
1128
|
+
destroyRecycleBin(maskMemory);
|
|
1129
|
+
maskMemory = NULL;
|
|
1130
|
+
sortKmerOccurenceTable(table->kmerOccurenceTable);
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
velvetLog("Inputting sequences...\n");
|
|
1134
|
+
|
|
1135
|
+
if (table->kmerOccurenceTable) {
|
|
1136
|
+
mapReferenceIDs = callocOrExit(sequenceCount + 1, IDnum*);
|
|
1137
|
+
mapCoords = callocOrExit(sequenceCount + 1, Coordinate *);
|
|
1138
|
+
mapCount = callocOrExit(sequenceCount + 1, Coordinate);
|
|
1139
|
+
|
|
1140
|
+
RefInfo *refArray = NULL;
|
|
1141
|
+
if (seqReadInfo.m_bIsBinary) {
|
|
1142
|
+
TightString cmpString;
|
|
1143
|
+
for (seqID = referenceSequenceCount + 1; seqID < sequenceCount + 1; seqID++) {
|
|
1144
|
+
cmpString.length = seqReadInfo.m_currentReadLength;
|
|
1145
|
+
cmpString.sequence = mallocOrExit((seqReadInfo.m_currentReadLength + 3) / 4, uint8_t );
|
|
1146
|
+
getCnySeqNucl(&seqReadInfo, cmpString.sequence);
|
|
1147
|
+
if (seqReadInfo.m_bIsRef) {
|
|
1148
|
+
seqReadInfo.m_refCnt = readCnySeqUint32(&seqReadInfo);
|
|
1149
|
+
// now the next ptr is advanced
|
|
1150
|
+
seqReadInfo.m_pNextReadPtr += (sizeof(RefInfo) * seqReadInfo.m_refCnt);
|
|
1151
|
+
refArray = callocOrExit(seqReadInfo.m_refCnt, RefInfo);
|
|
1152
|
+
uint32_t refIdx;
|
|
1153
|
+
for (refIdx = 0; refIdx < seqReadInfo.m_refCnt; refIdx++) {
|
|
1154
|
+
refArray[refIdx].m_referenceID = readCnySeqUint32(&seqReadInfo);
|
|
1155
|
+
refArray[refIdx].m_pos = readCnySeqUint32(&seqReadInfo);
|
|
1156
|
+
}
|
|
1157
|
+
}
|
|
1158
|
+
// optional test to ensure reference mapping seqIDs are in sync
|
|
1159
|
+
#if 0
|
|
1160
|
+
TightString *tString;
|
|
1161
|
+
tString = getTightStringInArray(array, seqID - 1);
|
|
1162
|
+
if (getLength(tString) != seqReadInfo.m_currentReadLength) {
|
|
1163
|
+
velvetLog("Error: TightString len mismatch, %d != %ld\n", getLength(tString), seqReadInfo.m_currentReadLength);
|
|
1164
|
+
exit(1);
|
|
1165
|
+
}
|
|
1166
|
+
char *str = readTightString(tString);
|
|
1167
|
+
char *cmpStr = readTightString(&cmpString);
|
|
1168
|
+
if (strcmp(str, cmpStr) != 0) {
|
|
1169
|
+
printf("seq %s != cmp %s\n", str, cmpStr);
|
|
1170
|
+
exit(1);
|
|
1171
|
+
}
|
|
1172
|
+
free(str);
|
|
1173
|
+
free(cmpStr);
|
|
1174
|
+
#endif
|
|
1175
|
+
free(cmpString.sequence);
|
|
1176
|
+
|
|
1177
|
+
// set prior count
|
|
1178
|
+
mapCount[seqID - 1] = counter;
|
|
1179
|
+
counter = 0;
|
|
1180
|
+
maxCount = 20;
|
|
1181
|
+
mapReferenceIDs[seqID] = callocOrExit(maxCount, IDnum);
|
|
1182
|
+
mapCoords[seqID] = callocOrExit(maxCount, Coordinate);
|
|
1183
|
+
|
|
1184
|
+
if (seqReadInfo.m_bIsRef) {
|
|
1185
|
+
while (counter < seqReadInfo.m_refCnt) {
|
|
1186
|
+
mapReferenceIDs[seqID][counter] = (IDnum) refArray[counter].m_referenceID;
|
|
1187
|
+
mapCoords[seqID][counter] = (Coordinate) refArray[counter].m_pos;
|
|
1188
|
+
|
|
1189
|
+
if (++counter == maxCount) {
|
|
1190
|
+
maxCount *= 2;
|
|
1191
|
+
mapReferenceIDs[seqID] = reallocOrExit(mapReferenceIDs[seqID], maxCount, IDnum);
|
|
1192
|
+
mapCoords[seqID] = reallocOrExit(mapCoords[seqID], maxCount, Coordinate);
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
free(refArray);
|
|
1196
|
+
}
|
|
1197
|
+
advanceCnySeqCurrentRead(&seqReadInfo);
|
|
1198
|
+
}
|
|
1199
|
+
} else {
|
|
1200
|
+
// Parse file for mapping info
|
|
1201
|
+
while (seqFile && (c = getc(seqFile)) != EOF) {
|
|
1202
|
+
|
|
1203
|
+
if (c == '>') {
|
|
1204
|
+
mapCount[seqID] = counter;
|
|
1205
|
+
counter = 0;
|
|
1206
|
+
maxCount = 20;
|
|
1207
|
+
fgets(line, MAXLINE, seqFile);
|
|
1208
|
+
sscanf(line,"%*[^\t]\t%li\t", &long_var);
|
|
1209
|
+
seqID = (IDnum) long_var;
|
|
1210
|
+
mapReferenceIDs[seqID] = callocOrExit(maxCount, IDnum);
|
|
1211
|
+
mapCoords[seqID] = callocOrExit(maxCount, Coordinate);
|
|
1212
|
+
} else if (c == 'M') {
|
|
1213
|
+
fgets(line, MAXLINE, seqFile);
|
|
1214
|
+
sscanf(line,"\t%li\t%lli\n", &long_var, &longlong_var);
|
|
1215
|
+
mapReferenceIDs[seqID][counter] = (IDnum) long_var;
|
|
1216
|
+
mapCoords[seqID][counter] = (Coordinate) longlong_var;
|
|
1217
|
+
|
|
1218
|
+
if (++counter == maxCount) {
|
|
1219
|
+
maxCount *= 2;
|
|
1220
|
+
mapReferenceIDs[seqID] = reallocOrExit(mapReferenceIDs[seqID], maxCount, IDnum);
|
|
1221
|
+
mapCoords[seqID] = reallocOrExit(mapCoords[seqID], maxCount, Coordinate);
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
if (seqFile)
|
|
1228
|
+
fclose(seqFile);
|
|
1229
|
+
|
|
1230
|
+
if (seqReadInfo.m_bIsBinary) {
|
|
1231
|
+
if (seqReadInfo.m_pReadBuffer) {
|
|
1232
|
+
free(seqReadInfo.m_pReadBuffer);
|
|
1233
|
+
}
|
|
1234
|
+
fclose(seqReadInfo.m_pFile);
|
|
1235
|
+
}
|
|
1236
|
+
|
|
1237
|
+
#ifdef _OPENMP
|
|
1238
|
+
producing = 1;
|
|
1239
|
+
#pragma omp parallel sections
|
|
1240
|
+
{
|
|
1241
|
+
#pragma omp section
|
|
1242
|
+
{
|
|
1243
|
+
bufferWritter(outfile);
|
|
1244
|
+
}
|
|
1245
|
+
#pragma omp section
|
|
1246
|
+
{
|
|
1247
|
+
#pragma omp parallel for
|
|
1248
|
+
#endif
|
|
1249
|
+
for (index = referenceSequenceCount; index < sequenceCount; index++)
|
|
1250
|
+
{
|
|
1251
|
+
boolean second_in_pair;
|
|
1252
|
+
|
|
1253
|
+
// Progress report on screen
|
|
1254
|
+
if (index % 1000000 == 0) {
|
|
1255
|
+
velvetLog("Inputting sequence %li / %li\n",
|
|
1256
|
+
(long)index, (long)sequenceCount);
|
|
1257
|
+
fflush(stdout);
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
// Test to make sure that all the reference reads are before all the other reads
|
|
1261
|
+
if (reads->categories[index] == REFERENCE) {
|
|
1262
|
+
velvetLog("Reference sequence placed after a non-reference read!\n");
|
|
1263
|
+
velvetLog(">> Please re-order the filenames in your command line so as "
|
|
1264
|
+
"to have the reference sequence files before all the others\n");
|
|
1265
|
+
#ifdef DEBUG
|
|
1266
|
+
abort();
|
|
1267
|
+
#endif
|
|
1268
|
+
exit(0);
|
|
1269
|
+
}
|
|
1270
|
+
second_in_pair = reads->categories[index] % 2 && isSecondInPair(reads, index);
|
|
1271
|
+
|
|
1272
|
+
// Hashing the reads
|
|
1273
|
+
if (table->kmerOccurenceTable)
|
|
1274
|
+
inputSequenceIntoSplayTable(array, table,
|
|
1275
|
+
outfile,
|
|
1276
|
+
second_in_pair, mapReferenceIDs[index + 1], mapCoords[index+1], mapCount[index+1], index + 1);
|
|
1277
|
+
else
|
|
1278
|
+
inputSequenceIntoSplayTable(array, table,
|
|
1279
|
+
outfile,
|
|
1280
|
+
second_in_pair, NULL, NULL, 0, index + 1);
|
|
1281
|
+
}
|
|
1282
|
+
#ifdef _OPENMP
|
|
1283
|
+
for (index = omp_get_max_threads() - 1; index >= 0; index--)
|
|
1284
|
+
pushBufferCommit(index);
|
|
1285
|
+
producing = 0;
|
|
1286
|
+
#pragma omp flush(producing)
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1289
|
+
destroyAnnotationBuffers();
|
|
1290
|
+
#else
|
|
1291
|
+
destroyStringBuffer(annotationBuffer, 1);
|
|
1292
|
+
#endif
|
|
1293
|
+
|
|
1294
|
+
gettimeofday(&end, NULL);
|
|
1295
|
+
timersub(&end, &start, &diff);
|
|
1296
|
+
velvetLog(" === Sequences loaded in %ld.%06ld s\n", (long) diff.tv_sec, (long) diff.tv_usec);
|
|
1297
|
+
|
|
1298
|
+
fclose(outfile);
|
|
1299
|
+
|
|
1300
|
+
if (mapReferenceIDs) {
|
|
1301
|
+
free(mapReferenceIDs);
|
|
1302
|
+
free(mapCoords);
|
|
1303
|
+
free(mapCount);
|
|
1304
|
+
}
|
|
1305
|
+
if (referenceMasks) {
|
|
1306
|
+
free(referenceMasks);
|
|
1307
|
+
}
|
|
1308
|
+
if (seqReadInfo.m_namesFilename) {
|
|
1309
|
+
free(seqReadInfo.m_namesFilename);
|
|
1310
|
+
}
|
|
1311
|
+
//free(reads->tSequences);
|
|
1312
|
+
//reads->tSequences = NULL;
|
|
1313
|
+
//destroyReadSet(reads);
|
|
1314
|
+
velvetLog("Done inputting sequences\n");
|
|
1315
|
+
}
|