finishm 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
@@ -0,0 +1,261 @@
|
|
1
|
+
class Bio::AssemblyGraphAlgorithms::SingleCoherentWanderer
|
2
|
+
include Bio::FinishM::Logging
|
3
|
+
|
4
|
+
# Like AcyclicConnectionFinder#depth_first_search_with_leash except use
|
5
|
+
# single read recoherence. The algorithm used is a generalisation of Dijkstra's
|
6
|
+
# shortest path algorithm, where instead of keeping track of the minimum
|
7
|
+
# distance to each node, the algorithm keeps track of the distance to a
|
8
|
+
# set of nodes long enough to invoke a recoherence kmer.
|
9
|
+
#
|
10
|
+
# Options:
|
11
|
+
# :max_explore_nodes: maximum number of nodes to explore from each node. If max is reached, don't make any connections (default: no maximum)
|
12
|
+
def wander(finishm_graph, leash_length, recoherence_kmer, sequence_hash, options={})
|
13
|
+
to_return = {}
|
14
|
+
|
15
|
+
# Take the probes and make them all into finishing nodes
|
16
|
+
finishing_nodes = []
|
17
|
+
finishm_graph.probe_nodes.each_with_index do |probe_node, probe_node_index|
|
18
|
+
direction = finishm_graph.probe_node_directions[probe_node_index]
|
19
|
+
if direction == true
|
20
|
+
finishing_nodes.push [probe_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST]
|
21
|
+
else
|
22
|
+
finishing_nodes.push [probe_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Search from each probed node in the graph
|
27
|
+
# TODO: is there a better way to implement this by somehow searching with
|
28
|
+
# all probe nodes at once, rather than starting fresh with each probe?
|
29
|
+
finishm_graph.probe_nodes.each_with_index do |probe_node, probe_node_index|
|
30
|
+
|
31
|
+
# Don't explore from the last node, as no new connections are established
|
32
|
+
next if probe_node_index == finishm_graph.probe_nodes.length - 1
|
33
|
+
|
34
|
+
# Go all the way to the leash length,
|
35
|
+
# and then search to see if any of the other nodes have been come across
|
36
|
+
log.debug "Exploring from probe node \##{probe_node_index+1} (node #{probe_node.node_id}/#{finishm_graph.probe_node_directions[probe_node_index] })" if log.debug?
|
37
|
+
pqueue = DS::AnyPriorityQueue.new {|a,b| a < b}
|
38
|
+
initial = finishm_graph.initial_path_from_probe(probe_node_index)
|
39
|
+
if initial.nil?
|
40
|
+
log.warn "Unable to start searching from probe \##{probe_node_index+1}, because it was not found in the graph. Skipping."
|
41
|
+
next
|
42
|
+
end
|
43
|
+
initial_distanced = DistancedOrientedNodeSet.new
|
44
|
+
initial_distanced.oriented_trail = initial
|
45
|
+
initial_distanced.distance = 0
|
46
|
+
|
47
|
+
# The minimum distance found to get to the head nodes
|
48
|
+
minimum_head_nodes_distances = {}
|
49
|
+
# Which head node sets is each node connected to?
|
50
|
+
node_to_head_node_sets = {}
|
51
|
+
#for Logging
|
52
|
+
last_logged_node_count = 0
|
53
|
+
maxed_out = false
|
54
|
+
|
55
|
+
pqueue.enqueue initial_distanced, 0
|
56
|
+
# While there are more node sets in the queue
|
57
|
+
while distanced_head_nodes = pqueue.dequeue
|
58
|
+
log.debug "Dequeued #{distanced_head_nodes}" if log.debug?
|
59
|
+
if options[:max_explore_nodes] and node_to_head_node_sets.length > options[:max_explore_nodes]
|
60
|
+
log.warn "Hit maximum number of nodes (#{options[:max_explore_nodes] }) while exploring from probe \##{probe_node_index+1}"
|
61
|
+
maxed_out = true
|
62
|
+
break
|
63
|
+
end
|
64
|
+
if log.info? and node_to_head_node_sets.length % 1024 == 0 and node_to_head_node_sets.length > last_logged_node_count
|
65
|
+
if last_logged_node_count == 0
|
66
|
+
log.info "While exploring from probe \##{probe_node_index+1}.."
|
67
|
+
end
|
68
|
+
log.info "So far worked with #{node_to_head_node_sets.length} distinct nodes in the assembly graph, at min distance #{distanced_head_nodes.distance}"
|
69
|
+
last_logged_node_count = node_to_head_node_sets.length
|
70
|
+
end
|
71
|
+
|
72
|
+
settable = distanced_head_nodes.to_settable
|
73
|
+
if minimum_head_nodes_distances.key?(settable) and
|
74
|
+
distanced_head_nodes.distance >= minimum_head_nodes_distances[distanced_head_nodes.to_settable].distance
|
75
|
+
# This node has already been explored, and no shorter path has been found here. Go no further.
|
76
|
+
next
|
77
|
+
end
|
78
|
+
minimum_head_nodes_distances[settable] = distanced_head_nodes
|
79
|
+
last_settable = distanced_head_nodes.oriented_trail.last.to_settable
|
80
|
+
node_to_head_node_sets[last_settable] ||= Set.new
|
81
|
+
node_to_head_node_sets[last_settable] << distanced_head_nodes.to_settable
|
82
|
+
|
83
|
+
if distanced_head_nodes.distance <= leash_length
|
84
|
+
# Still within the leash. Push into the stack all the current node's neighbours in the graph
|
85
|
+
last = distanced_head_nodes.oriented_trail.last
|
86
|
+
neighbour_onodes = finishm_graph.graph.neighbours_of(last.node, last.first_side)
|
87
|
+
log.debug "Found #{neighbour_onodes.length} neighbours" if log.debug?
|
88
|
+
if neighbour_onodes.length > 1
|
89
|
+
# Fork detected. Apply recoherence, and only enqueue those that pass
|
90
|
+
log.debug "Multiple neighbours found"
|
91
|
+
neighbour_onodes.each do |neighbour|
|
92
|
+
candidate = distanced_head_nodes.add_oriented_node_and_copy(neighbour, recoherence_kmer)
|
93
|
+
log.debug "Testing recoherence in candidate #{candidate.oriented_trail.to_s}" if log.debug?
|
94
|
+
if candidate.last_node_recoherent?(recoherence_kmer, sequence_hash)
|
95
|
+
log.debug "Candidate survived recoherence: #{candidate.to_s}" if log.debug?
|
96
|
+
pqueue.enqueue candidate, candidate.distance
|
97
|
+
elsif log.debug?
|
98
|
+
log.debug "Candidate did not survive recoherence #{candidate.oriented_trail.to_s}"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
else
|
102
|
+
# One or none neighbours found. Enqueue if there is one
|
103
|
+
neighbour_onodes.each do |neighbour|
|
104
|
+
candidate = distanced_head_nodes.add_oriented_node_and_copy(neighbour, recoherence_kmer)
|
105
|
+
pqueue.enqueue candidate, candidate.distance
|
106
|
+
end
|
107
|
+
end
|
108
|
+
else
|
109
|
+
# we are beyond the leash, go no further
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
if maxed_out
|
114
|
+
log.debug "Maxed out, exiting loop early" if log.debug?
|
115
|
+
next
|
116
|
+
end
|
117
|
+
|
118
|
+
# Now have a hash of minimum distances. Now need to go through those and determine
|
119
|
+
# which other nodes the current probe node is connected to
|
120
|
+
finishm_graph.probe_nodes.each_with_index do |node, i|
|
121
|
+
next if i < probe_node_index # only return the 'upper triangle' of the distance matrices
|
122
|
+
|
123
|
+
finish = finishing_nodes[i]
|
124
|
+
heads = node_to_head_node_sets[finish]
|
125
|
+
next if heads.nil? #no connection found
|
126
|
+
|
127
|
+
# There might be many head_sets that include the finishing node.
|
128
|
+
# Which one has the least distance?
|
129
|
+
overall_min_distanced_set = nil
|
130
|
+
heads.each do |head_set|
|
131
|
+
min_distanced_set = minimum_head_nodes_distances[head_set]
|
132
|
+
# If there is a new winner
|
133
|
+
if overall_min_distanced_set.nil? or
|
134
|
+
overall_min_distanced_set.distance > min_distanced_set.distance
|
135
|
+
|
136
|
+
if probes_on_single_node_ok?(finishm_graph, probe_node_index, i)
|
137
|
+
log.debug "Verified that probe indices #{probe_node_index}/#{i} are not failing on a 1 node basis" if log.debug?
|
138
|
+
else
|
139
|
+
#TODO: Possibly ok if contigs to be scaffolded are all on the same node. Unlikely in practice due to short tips, but still theoretically possible
|
140
|
+
log.debug "Failed to verify that probe indices #{probe_node_index}/#{i} are not failing on a 1 node basis" if log.debug?
|
141
|
+
next
|
142
|
+
end
|
143
|
+
|
144
|
+
overall_min_distanced_set = min_distanced_set
|
145
|
+
end
|
146
|
+
end
|
147
|
+
next if overall_min_distanced_set.nil? #no connection found - the only connection was a fake one
|
148
|
+
|
149
|
+
min_distance = overall_min_distanced_set.distance
|
150
|
+
log.debug "Found a connection between probes #{probe_node_index+1} and #{i+1}, distance: #{min_distance}" if log.debug?
|
151
|
+
to_return[[probe_node_index, i]] = min_distance
|
152
|
+
end
|
153
|
+
end
|
154
|
+
return to_return
|
155
|
+
end
|
156
|
+
|
157
|
+
# Check for position and orientation if start and finish nodes are
|
158
|
+
# on the same velvet node. Return true if OK as below or if the nodes
|
159
|
+
# are different
|
160
|
+
# --> <--- OK
|
161
|
+
# <-- --> not ok (unless the node is circular)
|
162
|
+
# <-- <-- not ok
|
163
|
+
# --> --> not ok
|
164
|
+
def probes_on_single_node_ok?(finishm_graph, start_node_index, end_node_index)
|
165
|
+
node1 = finishm_graph.probe_nodes[start_node_index]
|
166
|
+
node2 = finishm_graph.probe_nodes[end_node_index]
|
167
|
+
return true if node1.node_id != node2.node_id
|
168
|
+
|
169
|
+
node1_direction = finishm_graph.probe_node_directions[start_node_index]
|
170
|
+
node2_direction = finishm_graph.probe_node_directions[end_node_index]
|
171
|
+
node1_offset = direction_independent_offset_of_noded_read_from_start_of_node(
|
172
|
+
node1, finishm_graph.probe_node_reads[start_node_index])
|
173
|
+
node2_offset = direction_independent_offset_of_noded_read_from_start_of_node(
|
174
|
+
node1, finishm_graph.probe_node_reads[end_node_index])
|
175
|
+
log.debug "Validating for 1 node problems #{start_node_index}/#{end_node_index} #{node1_direction}/#{node2_direction} offsets #{node1_offset}/#{node2_offset}" if log.debug?
|
176
|
+
|
177
|
+
# true/false and probe1 left of probe2, immediately below, is the most intuitive.
|
178
|
+
# but false/true and probe1 right of probe2 is also valid
|
179
|
+
if node1_direction == true and node2_direction == false and
|
180
|
+
node1_offset < node2_offset
|
181
|
+
return true
|
182
|
+
end
|
183
|
+
if node1_direction == false and node2_direction == true and
|
184
|
+
node1_offset > node2_offset
|
185
|
+
return true
|
186
|
+
end
|
187
|
+
|
188
|
+
if node1_direction == true and node2_direction == false
|
189
|
+
onode = finishm_graph.velvet_oriented_node(start_node_index)
|
190
|
+
neighbours = finishm_graph.graph.neighbours_of(onode.node, onode.first_side).collect{|n| n.node_id}
|
191
|
+
return true if neighbours.include?(node1)
|
192
|
+
end
|
193
|
+
|
194
|
+
return false
|
195
|
+
end
|
196
|
+
|
197
|
+
private
|
198
|
+
def direction_independent_offset_of_noded_read_from_start_of_node(velvet_node, velvet_noded_read)
|
199
|
+
if velvet_noded_read.direction == true
|
200
|
+
return velvet_noded_read.offset_from_start_of_node
|
201
|
+
elsif velvet_noded_read.direction == false
|
202
|
+
return velvet_node.corresponding_contig_length - velvet_noded_read.offset_from_start_of_node
|
203
|
+
else
|
204
|
+
raise "programming error - velvet_noded_read does not have valid direction"
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# An oriented node some distance from the origin of exploration
|
209
|
+
class DistancedOrientedNodeSet
|
210
|
+
attr_accessor :oriented_trail, :distance
|
211
|
+
|
212
|
+
# Using Set object, often we want two separate objects to be considered equal even if
|
213
|
+
# they are distinct objects
|
214
|
+
def to_settable
|
215
|
+
settable = []
|
216
|
+
@oriented_trail.each do |onode|
|
217
|
+
settable.push onode.node_id
|
218
|
+
settable.push onode.first_side
|
219
|
+
end
|
220
|
+
return settable
|
221
|
+
end
|
222
|
+
|
223
|
+
# Create a copy of this object, then add the given oriented_node
|
224
|
+
# to this object, and discard objects from the rear of the trail if they
|
225
|
+
# are now of no use for recoherence. Update the distance
|
226
|
+
def add_oriented_node_and_copy(oriented_node, recoherence_kmer)
|
227
|
+
d = DistancedOrientedNodeSet.new
|
228
|
+
new_trail = @oriented_trail.trail+[oriented_node]
|
229
|
+
|
230
|
+
# Remove unneeded rear nodes that cannot contribute to the recoherence
|
231
|
+
# calculation going forward
|
232
|
+
cumulative_length = 0
|
233
|
+
i = new_trail.length - 1
|
234
|
+
while i >= 0 and cumulative_length < recoherence_kmer
|
235
|
+
cumulative_length += new_trail[i].node.length_alone
|
236
|
+
i -= 1
|
237
|
+
end
|
238
|
+
i += 1
|
239
|
+
d.oriented_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
240
|
+
d.oriented_trail.trail = new_trail[i..-1]
|
241
|
+
# Update distance
|
242
|
+
d.distance = @distance+oriented_node.node.length_alone
|
243
|
+
|
244
|
+
return d
|
245
|
+
end
|
246
|
+
|
247
|
+
# Is the head nodes single recoherent? Return false if not, otherwise true
|
248
|
+
def last_node_recoherent?(recoherence_kmer, sequence_hash)
|
249
|
+
@@single_recoherencer ||= Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder.new
|
250
|
+
return @@single_recoherencer.validate_last_node_of_path_by_recoherence(
|
251
|
+
@oriented_trail,
|
252
|
+
recoherence_kmer,
|
253
|
+
sequence_hash
|
254
|
+
)
|
255
|
+
end
|
256
|
+
|
257
|
+
def to_s
|
258
|
+
"#{@oriented_trail.to_s}(#{@distance})"
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
@@ -0,0 +1,441 @@
|
|
1
|
+
require 'ds'
|
2
|
+
require 'set'
|
3
|
+
require 'ruby-progressbar'
|
4
|
+
|
5
|
+
class Bio::AssemblyGraphAlgorithms::SingleEndedAssembler
|
6
|
+
include Bio::FinishM::Logging
|
7
|
+
|
8
|
+
DEFAULT_MAX_TIP_LENGTH = 200
|
9
|
+
DEFAULT_MIN_CONTIG_SIZE = 500
|
10
|
+
DEFAULT_MIN_CONFIRMING_RECOHERENCE_READS = 2
|
11
|
+
|
12
|
+
attr_accessor :graph
|
13
|
+
|
14
|
+
ASSEMBLY_OPTIONS = [
|
15
|
+
:max_tip_length,
|
16
|
+
:recoherence_kmer,
|
17
|
+
:min_confirming_recoherence_kmer_reads,
|
18
|
+
:sequences,
|
19
|
+
:leash_length,
|
20
|
+
:min_contig_size,
|
21
|
+
:max_coverage_at_fork,
|
22
|
+
]
|
23
|
+
attr_accessor :assembly_options
|
24
|
+
|
25
|
+
# Create a new assembler given a velvet graph and velvet Sequences object
|
26
|
+
#
|
27
|
+
# Assembly options:
|
28
|
+
# :max_tip_length: if a path is shorter than this in bp, then it will be clipped from the path. Default 100
|
29
|
+
# :recoherence_kmer: attempt to separate paths by going back to the reads with this larger kmer (requires :seqeunces)
|
30
|
+
# :sequences: the sequences of the actual reads, probably a Bio::Velvet::Underground::BinarySequenceStore object
|
31
|
+
# :leash_length: don't continue assembly from nodes farther than this distance (in bp) away
|
32
|
+
# :min_coverage_of_start_nodes: only start exploring from nodes with this much coverage
|
33
|
+
# :min_contig_size: don't bother returning contigs shorter than this (default 500bp)
|
34
|
+
# :progressbar_io: given an IO object e.g. $stdout, write progress information
|
35
|
+
def initialize(graph, assembly_options={})
|
36
|
+
@graph = graph
|
37
|
+
@assembly_options = assembly_options
|
38
|
+
@assembly_options[:max_tip_length] ||= DEFAULT_MAX_TIP_LENGTH
|
39
|
+
@assembly_options[:min_contig_size] ||= DEFAULT_MIN_CONTIG_SIZE
|
40
|
+
@assembly_options[:min_confirming_recoherence_kmer_reads] ||= DEFAULT_MIN_CONFIRMING_RECOHERENCE_READS
|
41
|
+
end
|
42
|
+
|
43
|
+
# Assemble everything in the graph into OrientedNodeTrail objects.
|
44
|
+
# Yields an OrientedNodeTrail if a block is
|
45
|
+
# given, otherwise returns an array of found paths. Options for
|
46
|
+
# assembly are specified in assembly_options
|
47
|
+
def assemble
|
48
|
+
paths = []
|
49
|
+
|
50
|
+
# Gather a list of nodes to try starting from
|
51
|
+
starting_nodes = gather_starting_nodes
|
52
|
+
log.info "Found #{starting_nodes.length} nodes to attempt assembly from"
|
53
|
+
|
54
|
+
seen_nodes = Set.new
|
55
|
+
progress = setup_progressbar starting_nodes.length
|
56
|
+
|
57
|
+
# For each starting node, start the assembly process
|
58
|
+
dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
59
|
+
starting_nodes.each do |start_node|
|
60
|
+
log.debug "Trying to assemble from #{start_node.node_id}" if log.debug?
|
61
|
+
|
62
|
+
# If we've already covered this node, don't try it again
|
63
|
+
if seen_nodes.include?([start_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
|
64
|
+
seen_nodes.include?([start_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
|
65
|
+
log.debug "Already seen this node, not inspecting further" if log.debug?
|
66
|
+
next
|
67
|
+
end
|
68
|
+
|
69
|
+
# first attempt to go forward as far as possible, then reverse the path
|
70
|
+
# and continue until cannot go farther
|
71
|
+
reversed_path_forward = find_beginning_trail_from_node(start_node, seen_nodes)
|
72
|
+
if reversed_path_forward.nil?
|
73
|
+
log.debug "Could not find forward path from this node, giving up" if log.debug?
|
74
|
+
next
|
75
|
+
end
|
76
|
+
# Have we already seen this path before?
|
77
|
+
#TODO: add in recoherence logic here
|
78
|
+
if seen_last_in_path?(reversed_path_forward, seen_nodes)
|
79
|
+
log.debug "Already seen the last node of the reversed path forward: #{reversed_path_forward.trail[-1].to_shorthand}, giving up" if log.debug?
|
80
|
+
next
|
81
|
+
end
|
82
|
+
# Assemble ahead again
|
83
|
+
log.debug "reversed_path_forward: #{reversed_path_forward.to_shorthand}" if log.debug?
|
84
|
+
path, just_visited_onodes = assemble_from(reversed_path_forward)
|
85
|
+
|
86
|
+
# Remove nodes that have already been seen to prevent duplication
|
87
|
+
log.debug "Before removing already seen nodes the second time, path was #{path.length} nodes long" if log.debug?
|
88
|
+
remove_seen_nodes_from_end_of_path(path, seen_nodes)
|
89
|
+
log.debug "After removing already seen nodes the second time, path was #{path.length} nodes long" if log.debug?
|
90
|
+
|
91
|
+
# Add the now seen nodes to the list
|
92
|
+
just_visited_onodes.each do |onode_settable|
|
93
|
+
seen_nodes << onode_settable
|
94
|
+
end
|
95
|
+
|
96
|
+
# Record which nodes have already been visited, so they aren't visited again
|
97
|
+
seen_nodes.merge just_visited_onodes
|
98
|
+
unless progress.nil?
|
99
|
+
if @assembly_options[:min_coverage_of_start_nodes]
|
100
|
+
# TODO: this could be better by progress += (starting_nodes_just_visited.length)
|
101
|
+
progress.increment
|
102
|
+
else
|
103
|
+
progress.progress += just_visited_onodes.length
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
if path.length_in_bp < @assembly_options[:min_contig_size]
|
108
|
+
log.debug "Path length (#{path.length_in_bp}) less than min_contig_size (#{@assembly_options[:min_contig_size] }), not recording it" if log.debug?
|
109
|
+
next
|
110
|
+
end
|
111
|
+
log.debug "Found a seemingly legitimate path #{path.to_shorthand}" if log.debug?
|
112
|
+
if block_given?
|
113
|
+
yield path
|
114
|
+
else
|
115
|
+
paths.push path
|
116
|
+
end
|
117
|
+
end
|
118
|
+
progress.finish unless progress.nil?
|
119
|
+
|
120
|
+
return paths
|
121
|
+
end
|
122
|
+
|
123
|
+
def seen_last_in_path?(path, seen_nodes)
|
124
|
+
seen_nodes.include?(path[-1].to_settable)
|
125
|
+
end
|
126
|
+
|
127
|
+
def gather_starting_nodes
|
128
|
+
if @assembly_options[:min_coverage_of_start_nodes] or @assembly_options[:min_length_of_start_nodes]
|
129
|
+
starting_nodes = []
|
130
|
+
graph.nodes.each do |node|
|
131
|
+
if (@assembly_options[:min_coverage_of_start_nodes].nil? or
|
132
|
+
node.coverage >= @assembly_options[:min_coverage_of_start_nodes]) and
|
133
|
+
(@assembly_options[:min_length_of_start_nodes].nil? or
|
134
|
+
node.length_alone >= @assembly_options[:min_length_of_start_nodes])
|
135
|
+
|
136
|
+
starting_nodes.push node
|
137
|
+
end
|
138
|
+
end
|
139
|
+
return starting_nodes
|
140
|
+
else
|
141
|
+
return graph.nodes
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def setup_progressbar(num_nodes)
|
146
|
+
progress = nil
|
147
|
+
if @assembly_options[:progressbar_io]
|
148
|
+
progress = ProgressBar.create(
|
149
|
+
:title => "Assembly",
|
150
|
+
:format => '%a %bᗧ%i %p%% %E %t',
|
151
|
+
:progress_mark => ' ',
|
152
|
+
:remainder_mark => '・',
|
153
|
+
:total => num_nodes,
|
154
|
+
:output => @assembly_options[:progressbar_io]
|
155
|
+
)
|
156
|
+
end
|
157
|
+
return progress
|
158
|
+
end
|
159
|
+
|
160
|
+
# Given a node, return a path that does not include any short tips, or nil if none is
|
161
|
+
# connected to this node.
|
162
|
+
# With this path, you can explore forwards. This isn't very clear commenting, but
|
163
|
+
# I'm just making this stuff up
|
164
|
+
def find_beginning_trail_from_node(node, previously_seen_nodes)
|
165
|
+
onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new
|
166
|
+
onode.node = node
|
167
|
+
onode.first_side = Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST #go backwards first, because the path will later be reversed
|
168
|
+
dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
169
|
+
dummy_trail.trail = [onode]
|
170
|
+
|
171
|
+
find_node_from_non_short_tip = lambda do |dummy_trail|
|
172
|
+
# go all the way forwards
|
173
|
+
path, visited_nodes = assemble_from(dummy_trail)
|
174
|
+
|
175
|
+
# Remove already seen nodes from the end of the trail, because
|
176
|
+
# they are already included in other paths and this shows
|
177
|
+
# up as duplicated contig stretches and this is not correct
|
178
|
+
log.debug "Before removing already seen nodes the first time, path was #{path.length} nodes long" if log.debug?
|
179
|
+
remove_seen_nodes_from_end_of_path(path, previously_seen_nodes)
|
180
|
+
log.debug "After removing already seen nodes the first time, path was #{path.length} nodes long" if log.debug?
|
181
|
+
|
182
|
+
# reverse the path
|
183
|
+
path.reverse!
|
184
|
+
# peel back up we aren't in a short tip (these lost nodes might be
|
185
|
+
# re-added later on)
|
186
|
+
cannot_remove_any_more_nodes = false
|
187
|
+
log.debug "Before pruning back, trail is #{path.to_shorthand}" if log.debug?
|
188
|
+
is_tip, whatever = is_short_tip?(path[-1])
|
189
|
+
while is_tip
|
190
|
+
if path.length == 1
|
191
|
+
cannot_remove_any_more_nodes = true
|
192
|
+
break
|
193
|
+
end
|
194
|
+
path.delete_at(path.length-1)
|
195
|
+
log.debug "After pruning back, trail is now #{path.to_shorthand}" if log.debug?
|
196
|
+
is_tip, whatever = is_short_tip?(path[-1])
|
197
|
+
end
|
198
|
+
|
199
|
+
if cannot_remove_any_more_nodes
|
200
|
+
nil
|
201
|
+
else
|
202
|
+
path
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
log.debug "Finding nearest find_connected_node_on_a_path #{node.node_id}" if log.debug?
|
207
|
+
if !is_short_tip?(onode)[0]
|
208
|
+
log.debug "fwd direction not a short tip, going with that" if log.debug?
|
209
|
+
path = find_node_from_non_short_tip.call(dummy_trail)
|
210
|
+
if !path.nil?
|
211
|
+
return path
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
log.debug "rev direction is short tip, now testing reverse" if log.debug?
|
216
|
+
onode.reverse!
|
217
|
+
if is_short_tip?(onode)[0]
|
218
|
+
log.debug "short tip in both directions, there is no good neighbour" if log.debug?
|
219
|
+
#short tip in both directions, so not a real contig
|
220
|
+
return nil
|
221
|
+
else
|
222
|
+
log.debug "reverse direction not a short tip, going with that" if log.debug?
|
223
|
+
return find_node_from_non_short_tip.call(dummy_trail)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
def remove_seen_nodes_from_end_of_path(path, seen_nodes)
|
228
|
+
log.debug "Removing from the end of the path #{path.to_shorthand} any nodes in set of size #{seen_nodes.length}" if log.debug?
|
229
|
+
while !path.trail.empty?
|
230
|
+
last_node_index = path.length-1
|
231
|
+
last_node = path[last_node_index]
|
232
|
+
|
233
|
+
if seen_nodes.include?([last_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
|
234
|
+
seen_nodes.include?([last_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
|
235
|
+
path.trail.delete_at(last_node_index)
|
236
|
+
else
|
237
|
+
# Last node is not previously seen, chop no further.
|
238
|
+
break
|
239
|
+
end
|
240
|
+
end
|
241
|
+
return path
|
242
|
+
end
|
243
|
+
|
244
|
+
# Assemble considering reads all reads as single ended. Options:
|
245
|
+
# :max_tip_length: if a path is shorter than this in bp, then it will be clipped from the path. Default 100
|
246
|
+
# :recoherence_kmer: attempt to separate paths by going back to the reads with this larger kmer
|
247
|
+
# :leash_length: don't continue assembly from nodes farther than this distance (in bp) away
|
248
|
+
def assemble_from(initial_path, visited_onodes=Set.new)
|
249
|
+
options = @assembly_options
|
250
|
+
|
251
|
+
recoherencer = Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder.new
|
252
|
+
|
253
|
+
path = initial_path.copy
|
254
|
+
#visited_onodes = Set.new
|
255
|
+
initial_path[0...-1].each do |onode| #Add all except the last node to already seen nodes list
|
256
|
+
visited_onodes << onode.to_settable
|
257
|
+
end
|
258
|
+
|
259
|
+
dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
260
|
+
oneighbours = nil
|
261
|
+
while true
|
262
|
+
log.debug "Now assembling from #{path[-1].to_shorthand}" if log.debug?
|
263
|
+
if visited_onodes.include?(path[-1].to_settable)
|
264
|
+
log.debug "Found circularisation in path, going no further" if log.debug?
|
265
|
+
break
|
266
|
+
else
|
267
|
+
visited_onodes << path[-1].to_settable
|
268
|
+
end
|
269
|
+
|
270
|
+
if options[:leash_length] and path.length_in_bp-@graph.hash_length > options[:leash_length]
|
271
|
+
log.debug "Beyond leash length, going to further with assembly" if log.debug?
|
272
|
+
break
|
273
|
+
end
|
274
|
+
|
275
|
+
oneighbours = path.neighbours_of_last_node(@graph)
|
276
|
+
if oneighbours.length == 0
|
277
|
+
log.debug "Found a dead end, last node is #{path[-1].to_shorthand}" if log.debug?
|
278
|
+
break
|
279
|
+
|
280
|
+
elsif oneighbours.length == 1
|
281
|
+
to_add = oneighbours[0]
|
282
|
+
log.debug "Only one way to go, so going there, to #{to_add.to_shorthand}" if log.debug?
|
283
|
+
path.add_oriented_node to_add
|
284
|
+
|
285
|
+
else
|
286
|
+
# Reached a fork (or 3 or 4-fork), which way to go?
|
287
|
+
|
288
|
+
# Remove neighbours that are short tips
|
289
|
+
oneighbours, visiteds = remove_tips(oneighbours, @assembly_options[:max_tip_length])
|
290
|
+
visiteds.each do |onode_settable|
|
291
|
+
visited_onodes << onode_settable
|
292
|
+
end
|
293
|
+
|
294
|
+
if oneighbours.length == 0
|
295
|
+
log.debug "Found a dead end at a fork, last node is #{path[-1].to_shorthand}" if log.debug?
|
296
|
+
break
|
297
|
+
elsif oneighbours.length == 1
|
298
|
+
log.debug "Clipped short tip(s) off, and then there was only one way to go" if log.debug?
|
299
|
+
path.add_oriented_node oneighbours[0]
|
300
|
+
elsif options[:recoherence_kmer].nil?
|
301
|
+
if log.debug?
|
302
|
+
neighbours_string = oneighbours.collect do |oneigh|
|
303
|
+
oneigh.to_shorthand
|
304
|
+
end.join(' or ')
|
305
|
+
log.debug "Came across what appears to be a legitimate fork to nodes #{neighbours_string} and no recoherence kmer given, so giving up" if log.debug?
|
306
|
+
end
|
307
|
+
break
|
308
|
+
else
|
309
|
+
unless options[:recoherence_kmer].nil?
|
310
|
+
log.debug "Attempting to resolve fork by recoherence" if log.debug?
|
311
|
+
oneighbours.select! do |oneigh|
|
312
|
+
dummy_trail.trail = path.trail+[oneigh]
|
313
|
+
recoherencer.validate_last_node_of_path_by_recoherence(
|
314
|
+
dummy_trail,
|
315
|
+
options[:recoherence_kmer],
|
316
|
+
options[:sequences],
|
317
|
+
options[:min_confirming_recoherence_kmer_reads]
|
318
|
+
)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
if oneighbours.length == 0
|
322
|
+
log.debug "no neighbours passed recoherence, giving up" if log.debug?
|
323
|
+
break
|
324
|
+
elsif oneighbours.length == 1
|
325
|
+
log.debug "After recoherence there's only one way to go, going there"
|
326
|
+
path.add_oriented_node oneighbours[0]
|
327
|
+
elsif options[:max_coverage_at_fork]
|
328
|
+
oneighbours.select! do |oneigh|
|
329
|
+
oneigh.node.coverage <= options[:max_coverage_at_fork]
|
330
|
+
end
|
331
|
+
log.debug "Found #{oneighbours.length} neighbours after removing nodes over max coverage" if log.debug?
|
332
|
+
|
333
|
+
if oneighbours.length == 1
|
334
|
+
log.debug "After removing too much coverage neighbours there's only one way to go, going there"
|
335
|
+
path.add_oriented_node oneighbours[0]
|
336
|
+
else
|
337
|
+
log.debug "After removing max coverage nodes, #{oneighbours.length} neighbours found (#{oneighbours.collect{|o| o.to_shorthand}.join(",") }), giving up" if log.debug?
|
338
|
+
break
|
339
|
+
end
|
340
|
+
|
341
|
+
|
342
|
+
else
|
343
|
+
log.debug "Still forked after recoherence (to #{oneighbours.collect{|on| on.to_shorthand}.join(' & ') }), so seems to be a legitimate fork, giving up" if log.debug?
|
344
|
+
break
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
visited_onodes << path[-1].to_settable
|
351
|
+
|
352
|
+
return path, visited_onodes
|
353
|
+
end
|
354
|
+
|
355
|
+
# Given a list of possibilities for neighbours of a node, return the
|
356
|
+
# neighbour(s) that are not short tips, or the longest of the short tips
|
357
|
+
# if all are tips. Also return an enumerable of nodes visited from the cut off
|
358
|
+
# short tips
|
359
|
+
def remove_tips(oriented_neighbours, tip_distance)
|
360
|
+
return [], [] if oriented_neighbours.empty?
|
361
|
+
|
362
|
+
neighbours_and_triples = oriented_neighbours.collect do |oneigh|
|
363
|
+
[
|
364
|
+
oneigh,
|
365
|
+
find_tip_distance(oneigh, tip_distance)
|
366
|
+
]
|
367
|
+
end
|
368
|
+
non_tips, tips = neighbours_and_triples.partition{|nt| nt[1][0] == false}
|
369
|
+
|
370
|
+
visiteds = Set.new
|
371
|
+
process_tip = lambda do |tip|
|
372
|
+
visiteds << tip[0].to_settable
|
373
|
+
tip[1][2].each {|v| visiteds << v}
|
374
|
+
end
|
375
|
+
|
376
|
+
if non_tips.length > 0
|
377
|
+
tips.each do |tip|
|
378
|
+
process_tip.call tip
|
379
|
+
end
|
380
|
+
return non_tips.collect{|t| t[0]}, visiteds
|
381
|
+
else
|
382
|
+
# no long distances here. Just go with the longest path
|
383
|
+
best_tip = tips.max{|nt| nt[1][1]}
|
384
|
+
tips.each do |tip|
|
385
|
+
unless tip == best_tip
|
386
|
+
process_tip.call tip
|
387
|
+
end
|
388
|
+
end
|
389
|
+
return [best_tip[0]], visiteds
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
# Returns false iff there is a path longer than max_tip_length
|
394
|
+
# starting at the given oriented_node. Currently works as a depth
|
395
|
+
# first search, which may or may not be optimal
|
396
|
+
def is_short_tip?(oriented_node)
|
397
|
+
max_tip_length = @assembly_options[:max_tip_length]
|
398
|
+
is_tip, max_distance, visited_onodes = find_tip_distance(oriented_node, max_tip_length)
|
399
|
+
return is_tip, visited_onodes
|
400
|
+
end
|
401
|
+
|
402
|
+
# The workhorse function of is_short_tip?
|
403
|
+
#
|
404
|
+
#
|
405
|
+
def find_tip_distance(oriented_node, max_tip_length)
|
406
|
+
stack = DS::Stack.new
|
407
|
+
first = MaxDistancedOrientedNode.new
|
408
|
+
first.onode = oriented_node
|
409
|
+
first.distance = oriented_node.node.length_alone
|
410
|
+
stack.push first
|
411
|
+
|
412
|
+
cache = {}
|
413
|
+
max_dist = first.distance
|
414
|
+
|
415
|
+
while current_max_distanced_onode = stack.pop
|
416
|
+
if current_max_distanced_onode.distance > max_tip_length
|
417
|
+
return false, current_max_distanced_onode.distance, []
|
418
|
+
end
|
419
|
+
|
420
|
+
max_dist = [max_dist, current_max_distanced_onode.distance].max
|
421
|
+
|
422
|
+
current_max_distanced_onode.onode.next_neighbours(@graph).each do |oneigh|
|
423
|
+
neighbour_distance = current_max_distanced_onode.distance + oneigh.node.length_alone
|
424
|
+
next if cache[oneigh.to_settable] and cache[oneigh.to_settable] >= neighbour_distance
|
425
|
+
distanced_node = MaxDistancedOrientedNode.new
|
426
|
+
distanced_node.onode = oneigh
|
427
|
+
distanced_node.distance = neighbour_distance
|
428
|
+
log.debug "The distance of #{distanced_node.onode.node_id} is at least #{neighbour_distance}" if log.debug?
|
429
|
+
cache[oneigh.to_settable] = neighbour_distance
|
430
|
+
stack.push distanced_node
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
|
+
log.debug "Found insufficient max tip length #{max_dist} for #{oriented_node}" if log.debug?
|
435
|
+
return true, max_dist, cache.collect{|donode| donode[0]}
|
436
|
+
end
|
437
|
+
|
438
|
+
class MaxDistancedOrientedNode
|
439
|
+
attr_accessor :onode, :distance
|
440
|
+
end
|
441
|
+
end
|