finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
class Bio::AssemblyGraphAlgorithms::SingleCoherentWanderer
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
|
|
4
|
+
# Like AcyclicConnectionFinder#depth_first_search_with_leash except use
|
|
5
|
+
# single read recoherence. The algorithm used is a generalisation of Dijkstra's
|
|
6
|
+
# shortest path algorithm, where instead of keeping track of the minimum
|
|
7
|
+
# distance to each node, the algorithm keeps track of the distance to a
|
|
8
|
+
# set of nodes long enough to invoke a recoherence kmer.
|
|
9
|
+
#
|
|
10
|
+
# Options:
|
|
11
|
+
# :max_explore_nodes: maximum number of nodes to explore from each node. If max is reached, don't make any connections (default: no maximum)
|
|
12
|
+
def wander(finishm_graph, leash_length, recoherence_kmer, sequence_hash, options={})
|
|
13
|
+
to_return = {}
|
|
14
|
+
|
|
15
|
+
# Take the probes and make them all into finishing nodes
|
|
16
|
+
finishing_nodes = []
|
|
17
|
+
finishm_graph.probe_nodes.each_with_index do |probe_node, probe_node_index|
|
|
18
|
+
direction = finishm_graph.probe_node_directions[probe_node_index]
|
|
19
|
+
if direction == true
|
|
20
|
+
finishing_nodes.push [probe_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST]
|
|
21
|
+
else
|
|
22
|
+
finishing_nodes.push [probe_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Search from each probed node in the graph
|
|
27
|
+
# TODO: is there a better way to implement this by somehow searching with
|
|
28
|
+
# all probe nodes at once, rather than starting fresh with each probe?
|
|
29
|
+
finishm_graph.probe_nodes.each_with_index do |probe_node, probe_node_index|
|
|
30
|
+
|
|
31
|
+
# Don't explore from the last node, as no new connections are established
|
|
32
|
+
next if probe_node_index == finishm_graph.probe_nodes.length - 1
|
|
33
|
+
|
|
34
|
+
# Go all the way to the leash length,
|
|
35
|
+
# and then search to see if any of the other nodes have been come across
|
|
36
|
+
log.debug "Exploring from probe node \##{probe_node_index+1} (node #{probe_node.node_id}/#{finishm_graph.probe_node_directions[probe_node_index] })" if log.debug?
|
|
37
|
+
pqueue = DS::AnyPriorityQueue.new {|a,b| a < b}
|
|
38
|
+
initial = finishm_graph.initial_path_from_probe(probe_node_index)
|
|
39
|
+
if initial.nil?
|
|
40
|
+
log.warn "Unable to start searching from probe \##{probe_node_index+1}, because it was not found in the graph. Skipping."
|
|
41
|
+
next
|
|
42
|
+
end
|
|
43
|
+
initial_distanced = DistancedOrientedNodeSet.new
|
|
44
|
+
initial_distanced.oriented_trail = initial
|
|
45
|
+
initial_distanced.distance = 0
|
|
46
|
+
|
|
47
|
+
# The minimum distance found to get to the head nodes
|
|
48
|
+
minimum_head_nodes_distances = {}
|
|
49
|
+
# Which head node sets is each node connected to?
|
|
50
|
+
node_to_head_node_sets = {}
|
|
51
|
+
#for Logging
|
|
52
|
+
last_logged_node_count = 0
|
|
53
|
+
maxed_out = false
|
|
54
|
+
|
|
55
|
+
pqueue.enqueue initial_distanced, 0
|
|
56
|
+
# While there are more node sets in the queue
|
|
57
|
+
while distanced_head_nodes = pqueue.dequeue
|
|
58
|
+
log.debug "Dequeued #{distanced_head_nodes}" if log.debug?
|
|
59
|
+
if options[:max_explore_nodes] and node_to_head_node_sets.length > options[:max_explore_nodes]
|
|
60
|
+
log.warn "Hit maximum number of nodes (#{options[:max_explore_nodes] }) while exploring from probe \##{probe_node_index+1}"
|
|
61
|
+
maxed_out = true
|
|
62
|
+
break
|
|
63
|
+
end
|
|
64
|
+
if log.info? and node_to_head_node_sets.length % 1024 == 0 and node_to_head_node_sets.length > last_logged_node_count
|
|
65
|
+
if last_logged_node_count == 0
|
|
66
|
+
log.info "While exploring from probe \##{probe_node_index+1}.."
|
|
67
|
+
end
|
|
68
|
+
log.info "So far worked with #{node_to_head_node_sets.length} distinct nodes in the assembly graph, at min distance #{distanced_head_nodes.distance}"
|
|
69
|
+
last_logged_node_count = node_to_head_node_sets.length
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
settable = distanced_head_nodes.to_settable
|
|
73
|
+
if minimum_head_nodes_distances.key?(settable) and
|
|
74
|
+
distanced_head_nodes.distance >= minimum_head_nodes_distances[distanced_head_nodes.to_settable].distance
|
|
75
|
+
# This node has already been explored, and no shorter path has been found here. Go no further.
|
|
76
|
+
next
|
|
77
|
+
end
|
|
78
|
+
minimum_head_nodes_distances[settable] = distanced_head_nodes
|
|
79
|
+
last_settable = distanced_head_nodes.oriented_trail.last.to_settable
|
|
80
|
+
node_to_head_node_sets[last_settable] ||= Set.new
|
|
81
|
+
node_to_head_node_sets[last_settable] << distanced_head_nodes.to_settable
|
|
82
|
+
|
|
83
|
+
if distanced_head_nodes.distance <= leash_length
|
|
84
|
+
# Still within the leash. Push into the stack all the current node's neighbours in the graph
|
|
85
|
+
last = distanced_head_nodes.oriented_trail.last
|
|
86
|
+
neighbour_onodes = finishm_graph.graph.neighbours_of(last.node, last.first_side)
|
|
87
|
+
log.debug "Found #{neighbour_onodes.length} neighbours" if log.debug?
|
|
88
|
+
if neighbour_onodes.length > 1
|
|
89
|
+
# Fork detected. Apply recoherence, and only enqueue those that pass
|
|
90
|
+
log.debug "Multiple neighbours found"
|
|
91
|
+
neighbour_onodes.each do |neighbour|
|
|
92
|
+
candidate = distanced_head_nodes.add_oriented_node_and_copy(neighbour, recoherence_kmer)
|
|
93
|
+
log.debug "Testing recoherence in candidate #{candidate.oriented_trail.to_s}" if log.debug?
|
|
94
|
+
if candidate.last_node_recoherent?(recoherence_kmer, sequence_hash)
|
|
95
|
+
log.debug "Candidate survived recoherence: #{candidate.to_s}" if log.debug?
|
|
96
|
+
pqueue.enqueue candidate, candidate.distance
|
|
97
|
+
elsif log.debug?
|
|
98
|
+
log.debug "Candidate did not survive recoherence #{candidate.oriented_trail.to_s}"
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
else
|
|
102
|
+
# One or none neighbours found. Enqueue if there is one
|
|
103
|
+
neighbour_onodes.each do |neighbour|
|
|
104
|
+
candidate = distanced_head_nodes.add_oriented_node_and_copy(neighbour, recoherence_kmer)
|
|
105
|
+
pqueue.enqueue candidate, candidate.distance
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
else
|
|
109
|
+
# we are beyond the leash, go no further
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
if maxed_out
|
|
114
|
+
log.debug "Maxed out, exiting loop early" if log.debug?
|
|
115
|
+
next
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Now have a hash of minimum distances. Now need to go through those and determine
|
|
119
|
+
# which other nodes the current probe node is connected to
|
|
120
|
+
finishm_graph.probe_nodes.each_with_index do |node, i|
|
|
121
|
+
next if i < probe_node_index # only return the 'upper triangle' of the distance matrices
|
|
122
|
+
|
|
123
|
+
finish = finishing_nodes[i]
|
|
124
|
+
heads = node_to_head_node_sets[finish]
|
|
125
|
+
next if heads.nil? #no connection found
|
|
126
|
+
|
|
127
|
+
# There might be many head_sets that include the finishing node.
|
|
128
|
+
# Which one has the least distance?
|
|
129
|
+
overall_min_distanced_set = nil
|
|
130
|
+
heads.each do |head_set|
|
|
131
|
+
min_distanced_set = minimum_head_nodes_distances[head_set]
|
|
132
|
+
# If there is a new winner
|
|
133
|
+
if overall_min_distanced_set.nil? or
|
|
134
|
+
overall_min_distanced_set.distance > min_distanced_set.distance
|
|
135
|
+
|
|
136
|
+
if probes_on_single_node_ok?(finishm_graph, probe_node_index, i)
|
|
137
|
+
log.debug "Verified that probe indices #{probe_node_index}/#{i} are not failing on a 1 node basis" if log.debug?
|
|
138
|
+
else
|
|
139
|
+
#TODO: Possibly ok if contigs to be scaffolded are all on the same node. Unlikely in practice due to short tips, but still theoretically possible
|
|
140
|
+
log.debug "Failed to verify that probe indices #{probe_node_index}/#{i} are not failing on a 1 node basis" if log.debug?
|
|
141
|
+
next
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
overall_min_distanced_set = min_distanced_set
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
next if overall_min_distanced_set.nil? #no connection found - the only connection was a fake one
|
|
148
|
+
|
|
149
|
+
min_distance = overall_min_distanced_set.distance
|
|
150
|
+
log.debug "Found a connection between probes #{probe_node_index+1} and #{i+1}, distance: #{min_distance}" if log.debug?
|
|
151
|
+
to_return[[probe_node_index, i]] = min_distance
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
return to_return
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Check for position and orientation if start and finish nodes are
|
|
158
|
+
# on the same velvet node. Return true if OK as below or if the nodes
|
|
159
|
+
# are different
|
|
160
|
+
# --> <--- OK
|
|
161
|
+
# <-- --> not ok (unless the node is circular)
|
|
162
|
+
# <-- <-- not ok
|
|
163
|
+
# --> --> not ok
|
|
164
|
+
def probes_on_single_node_ok?(finishm_graph, start_node_index, end_node_index)
|
|
165
|
+
node1 = finishm_graph.probe_nodes[start_node_index]
|
|
166
|
+
node2 = finishm_graph.probe_nodes[end_node_index]
|
|
167
|
+
return true if node1.node_id != node2.node_id
|
|
168
|
+
|
|
169
|
+
node1_direction = finishm_graph.probe_node_directions[start_node_index]
|
|
170
|
+
node2_direction = finishm_graph.probe_node_directions[end_node_index]
|
|
171
|
+
node1_offset = direction_independent_offset_of_noded_read_from_start_of_node(
|
|
172
|
+
node1, finishm_graph.probe_node_reads[start_node_index])
|
|
173
|
+
node2_offset = direction_independent_offset_of_noded_read_from_start_of_node(
|
|
174
|
+
node1, finishm_graph.probe_node_reads[end_node_index])
|
|
175
|
+
log.debug "Validating for 1 node problems #{start_node_index}/#{end_node_index} #{node1_direction}/#{node2_direction} offsets #{node1_offset}/#{node2_offset}" if log.debug?
|
|
176
|
+
|
|
177
|
+
# true/false and probe1 left of probe2, immediately below, is the most intuitive.
|
|
178
|
+
# but false/true and probe1 right of probe2 is also valid
|
|
179
|
+
if node1_direction == true and node2_direction == false and
|
|
180
|
+
node1_offset < node2_offset
|
|
181
|
+
return true
|
|
182
|
+
end
|
|
183
|
+
if node1_direction == false and node2_direction == true and
|
|
184
|
+
node1_offset > node2_offset
|
|
185
|
+
return true
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
if node1_direction == true and node2_direction == false
|
|
189
|
+
onode = finishm_graph.velvet_oriented_node(start_node_index)
|
|
190
|
+
neighbours = finishm_graph.graph.neighbours_of(onode.node, onode.first_side).collect{|n| n.node_id}
|
|
191
|
+
return true if neighbours.include?(node1)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
return false
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
private
|
|
198
|
+
def direction_independent_offset_of_noded_read_from_start_of_node(velvet_node, velvet_noded_read)
|
|
199
|
+
if velvet_noded_read.direction == true
|
|
200
|
+
return velvet_noded_read.offset_from_start_of_node
|
|
201
|
+
elsif velvet_noded_read.direction == false
|
|
202
|
+
return velvet_node.corresponding_contig_length - velvet_noded_read.offset_from_start_of_node
|
|
203
|
+
else
|
|
204
|
+
raise "programming error - velvet_noded_read does not have valid direction"
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# An oriented node some distance from the origin of exploration
|
|
209
|
+
class DistancedOrientedNodeSet
|
|
210
|
+
attr_accessor :oriented_trail, :distance
|
|
211
|
+
|
|
212
|
+
# Using Set object, often we want two separate objects to be considered equal even if
|
|
213
|
+
# they are distinct objects
|
|
214
|
+
def to_settable
|
|
215
|
+
settable = []
|
|
216
|
+
@oriented_trail.each do |onode|
|
|
217
|
+
settable.push onode.node_id
|
|
218
|
+
settable.push onode.first_side
|
|
219
|
+
end
|
|
220
|
+
return settable
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Create a copy of this object, then add the given oriented_node
|
|
224
|
+
# to this object, and discard objects from the rear of the trail if they
|
|
225
|
+
# are now of no use for recoherence. Update the distance
|
|
226
|
+
def add_oriented_node_and_copy(oriented_node, recoherence_kmer)
|
|
227
|
+
d = DistancedOrientedNodeSet.new
|
|
228
|
+
new_trail = @oriented_trail.trail+[oriented_node]
|
|
229
|
+
|
|
230
|
+
# Remove unneeded rear nodes that cannot contribute to the recoherence
|
|
231
|
+
# calculation going forward
|
|
232
|
+
cumulative_length = 0
|
|
233
|
+
i = new_trail.length - 1
|
|
234
|
+
while i >= 0 and cumulative_length < recoherence_kmer
|
|
235
|
+
cumulative_length += new_trail[i].node.length_alone
|
|
236
|
+
i -= 1
|
|
237
|
+
end
|
|
238
|
+
i += 1
|
|
239
|
+
d.oriented_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
240
|
+
d.oriented_trail.trail = new_trail[i..-1]
|
|
241
|
+
# Update distance
|
|
242
|
+
d.distance = @distance+oriented_node.node.length_alone
|
|
243
|
+
|
|
244
|
+
return d
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Is the head nodes single recoherent? Return false if not, otherwise true
|
|
248
|
+
def last_node_recoherent?(recoherence_kmer, sequence_hash)
|
|
249
|
+
@@single_recoherencer ||= Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder.new
|
|
250
|
+
return @@single_recoherencer.validate_last_node_of_path_by_recoherence(
|
|
251
|
+
@oriented_trail,
|
|
252
|
+
recoherence_kmer,
|
|
253
|
+
sequence_hash
|
|
254
|
+
)
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def to_s
|
|
258
|
+
"#{@oriented_trail.to_s}(#{@distance})"
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
end
|
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
require 'ds'
|
|
2
|
+
require 'set'
|
|
3
|
+
require 'ruby-progressbar'
|
|
4
|
+
|
|
5
|
+
class Bio::AssemblyGraphAlgorithms::SingleEndedAssembler
|
|
6
|
+
include Bio::FinishM::Logging
|
|
7
|
+
|
|
8
|
+
DEFAULT_MAX_TIP_LENGTH = 200
|
|
9
|
+
DEFAULT_MIN_CONTIG_SIZE = 500
|
|
10
|
+
DEFAULT_MIN_CONFIRMING_RECOHERENCE_READS = 2
|
|
11
|
+
|
|
12
|
+
attr_accessor :graph
|
|
13
|
+
|
|
14
|
+
ASSEMBLY_OPTIONS = [
|
|
15
|
+
:max_tip_length,
|
|
16
|
+
:recoherence_kmer,
|
|
17
|
+
:min_confirming_recoherence_kmer_reads,
|
|
18
|
+
:sequences,
|
|
19
|
+
:leash_length,
|
|
20
|
+
:min_contig_size,
|
|
21
|
+
:max_coverage_at_fork,
|
|
22
|
+
]
|
|
23
|
+
attr_accessor :assembly_options
|
|
24
|
+
|
|
25
|
+
# Create a new assembler given a velvet graph and velvet Sequences object
|
|
26
|
+
#
|
|
27
|
+
# Assembly options:
|
|
28
|
+
# :max_tip_length: if a path is shorter than this in bp, then it will be clipped from the path. Default 100
|
|
29
|
+
# :recoherence_kmer: attempt to separate paths by going back to the reads with this larger kmer (requires :seqeunces)
|
|
30
|
+
# :sequences: the sequences of the actual reads, probably a Bio::Velvet::Underground::BinarySequenceStore object
|
|
31
|
+
# :leash_length: don't continue assembly from nodes farther than this distance (in bp) away
|
|
32
|
+
# :min_coverage_of_start_nodes: only start exploring from nodes with this much coverage
|
|
33
|
+
# :min_contig_size: don't bother returning contigs shorter than this (default 500bp)
|
|
34
|
+
# :progressbar_io: given an IO object e.g. $stdout, write progress information
|
|
35
|
+
def initialize(graph, assembly_options={})
|
|
36
|
+
@graph = graph
|
|
37
|
+
@assembly_options = assembly_options
|
|
38
|
+
@assembly_options[:max_tip_length] ||= DEFAULT_MAX_TIP_LENGTH
|
|
39
|
+
@assembly_options[:min_contig_size] ||= DEFAULT_MIN_CONTIG_SIZE
|
|
40
|
+
@assembly_options[:min_confirming_recoherence_kmer_reads] ||= DEFAULT_MIN_CONFIRMING_RECOHERENCE_READS
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Assemble everything in the graph into OrientedNodeTrail objects.
|
|
44
|
+
# Yields an OrientedNodeTrail if a block is
|
|
45
|
+
# given, otherwise returns an array of found paths. Options for
|
|
46
|
+
# assembly are specified in assembly_options
|
|
47
|
+
def assemble
|
|
48
|
+
paths = []
|
|
49
|
+
|
|
50
|
+
# Gather a list of nodes to try starting from
|
|
51
|
+
starting_nodes = gather_starting_nodes
|
|
52
|
+
log.info "Found #{starting_nodes.length} nodes to attempt assembly from"
|
|
53
|
+
|
|
54
|
+
seen_nodes = Set.new
|
|
55
|
+
progress = setup_progressbar starting_nodes.length
|
|
56
|
+
|
|
57
|
+
# For each starting node, start the assembly process
|
|
58
|
+
dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
59
|
+
starting_nodes.each do |start_node|
|
|
60
|
+
log.debug "Trying to assemble from #{start_node.node_id}" if log.debug?
|
|
61
|
+
|
|
62
|
+
# If we've already covered this node, don't try it again
|
|
63
|
+
if seen_nodes.include?([start_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
|
|
64
|
+
seen_nodes.include?([start_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
|
|
65
|
+
log.debug "Already seen this node, not inspecting further" if log.debug?
|
|
66
|
+
next
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# first attempt to go forward as far as possible, then reverse the path
|
|
70
|
+
# and continue until cannot go farther
|
|
71
|
+
reversed_path_forward = find_beginning_trail_from_node(start_node, seen_nodes)
|
|
72
|
+
if reversed_path_forward.nil?
|
|
73
|
+
log.debug "Could not find forward path from this node, giving up" if log.debug?
|
|
74
|
+
next
|
|
75
|
+
end
|
|
76
|
+
# Have we already seen this path before?
|
|
77
|
+
#TODO: add in recoherence logic here
|
|
78
|
+
if seen_last_in_path?(reversed_path_forward, seen_nodes)
|
|
79
|
+
log.debug "Already seen the last node of the reversed path forward: #{reversed_path_forward.trail[-1].to_shorthand}, giving up" if log.debug?
|
|
80
|
+
next
|
|
81
|
+
end
|
|
82
|
+
# Assemble ahead again
|
|
83
|
+
log.debug "reversed_path_forward: #{reversed_path_forward.to_shorthand}" if log.debug?
|
|
84
|
+
path, just_visited_onodes = assemble_from(reversed_path_forward)
|
|
85
|
+
|
|
86
|
+
# Remove nodes that have already been seen to prevent duplication
|
|
87
|
+
log.debug "Before removing already seen nodes the second time, path was #{path.length} nodes long" if log.debug?
|
|
88
|
+
remove_seen_nodes_from_end_of_path(path, seen_nodes)
|
|
89
|
+
log.debug "After removing already seen nodes the second time, path was #{path.length} nodes long" if log.debug?
|
|
90
|
+
|
|
91
|
+
# Add the now seen nodes to the list
|
|
92
|
+
just_visited_onodes.each do |onode_settable|
|
|
93
|
+
seen_nodes << onode_settable
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Record which nodes have already been visited, so they aren't visited again
|
|
97
|
+
seen_nodes.merge just_visited_onodes
|
|
98
|
+
unless progress.nil?
|
|
99
|
+
if @assembly_options[:min_coverage_of_start_nodes]
|
|
100
|
+
# TODO: this could be better by progress += (starting_nodes_just_visited.length)
|
|
101
|
+
progress.increment
|
|
102
|
+
else
|
|
103
|
+
progress.progress += just_visited_onodes.length
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
if path.length_in_bp < @assembly_options[:min_contig_size]
|
|
108
|
+
log.debug "Path length (#{path.length_in_bp}) less than min_contig_size (#{@assembly_options[:min_contig_size] }), not recording it" if log.debug?
|
|
109
|
+
next
|
|
110
|
+
end
|
|
111
|
+
log.debug "Found a seemingly legitimate path #{path.to_shorthand}" if log.debug?
|
|
112
|
+
if block_given?
|
|
113
|
+
yield path
|
|
114
|
+
else
|
|
115
|
+
paths.push path
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
progress.finish unless progress.nil?
|
|
119
|
+
|
|
120
|
+
return paths
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def seen_last_in_path?(path, seen_nodes)
|
|
124
|
+
seen_nodes.include?(path[-1].to_settable)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def gather_starting_nodes
|
|
128
|
+
if @assembly_options[:min_coverage_of_start_nodes] or @assembly_options[:min_length_of_start_nodes]
|
|
129
|
+
starting_nodes = []
|
|
130
|
+
graph.nodes.each do |node|
|
|
131
|
+
if (@assembly_options[:min_coverage_of_start_nodes].nil? or
|
|
132
|
+
node.coverage >= @assembly_options[:min_coverage_of_start_nodes]) and
|
|
133
|
+
(@assembly_options[:min_length_of_start_nodes].nil? or
|
|
134
|
+
node.length_alone >= @assembly_options[:min_length_of_start_nodes])
|
|
135
|
+
|
|
136
|
+
starting_nodes.push node
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
return starting_nodes
|
|
140
|
+
else
|
|
141
|
+
return graph.nodes
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def setup_progressbar(num_nodes)
|
|
146
|
+
progress = nil
|
|
147
|
+
if @assembly_options[:progressbar_io]
|
|
148
|
+
progress = ProgressBar.create(
|
|
149
|
+
:title => "Assembly",
|
|
150
|
+
:format => '%a %bᗧ%i %p%% %E %t',
|
|
151
|
+
:progress_mark => ' ',
|
|
152
|
+
:remainder_mark => '・',
|
|
153
|
+
:total => num_nodes,
|
|
154
|
+
:output => @assembly_options[:progressbar_io]
|
|
155
|
+
)
|
|
156
|
+
end
|
|
157
|
+
return progress
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Given a node, return a path that does not include any short tips, or nil if none is
|
|
161
|
+
# connected to this node.
|
|
162
|
+
# With this path, you can explore forwards. This isn't very clear commenting, but
|
|
163
|
+
# I'm just making this stuff up
|
|
164
|
+
def find_beginning_trail_from_node(node, previously_seen_nodes)
|
|
165
|
+
onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new
|
|
166
|
+
onode.node = node
|
|
167
|
+
onode.first_side = Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST #go backwards first, because the path will later be reversed
|
|
168
|
+
dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
169
|
+
dummy_trail.trail = [onode]
|
|
170
|
+
|
|
171
|
+
find_node_from_non_short_tip = lambda do |dummy_trail|
|
|
172
|
+
# go all the way forwards
|
|
173
|
+
path, visited_nodes = assemble_from(dummy_trail)
|
|
174
|
+
|
|
175
|
+
# Remove already seen nodes from the end of the trail, because
|
|
176
|
+
# they are already included in other paths and this shows
|
|
177
|
+
# up as duplicated contig stretches and this is not correct
|
|
178
|
+
log.debug "Before removing already seen nodes the first time, path was #{path.length} nodes long" if log.debug?
|
|
179
|
+
remove_seen_nodes_from_end_of_path(path, previously_seen_nodes)
|
|
180
|
+
log.debug "After removing already seen nodes the first time, path was #{path.length} nodes long" if log.debug?
|
|
181
|
+
|
|
182
|
+
# reverse the path
|
|
183
|
+
path.reverse!
|
|
184
|
+
# peel back up we aren't in a short tip (these lost nodes might be
|
|
185
|
+
# re-added later on)
|
|
186
|
+
cannot_remove_any_more_nodes = false
|
|
187
|
+
log.debug "Before pruning back, trail is #{path.to_shorthand}" if log.debug?
|
|
188
|
+
is_tip, whatever = is_short_tip?(path[-1])
|
|
189
|
+
while is_tip
|
|
190
|
+
if path.length == 1
|
|
191
|
+
cannot_remove_any_more_nodes = true
|
|
192
|
+
break
|
|
193
|
+
end
|
|
194
|
+
path.delete_at(path.length-1)
|
|
195
|
+
log.debug "After pruning back, trail is now #{path.to_shorthand}" if log.debug?
|
|
196
|
+
is_tip, whatever = is_short_tip?(path[-1])
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
if cannot_remove_any_more_nodes
|
|
200
|
+
nil
|
|
201
|
+
else
|
|
202
|
+
path
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
log.debug "Finding nearest find_connected_node_on_a_path #{node.node_id}" if log.debug?
|
|
207
|
+
if !is_short_tip?(onode)[0]
|
|
208
|
+
log.debug "fwd direction not a short tip, going with that" if log.debug?
|
|
209
|
+
path = find_node_from_non_short_tip.call(dummy_trail)
|
|
210
|
+
if !path.nil?
|
|
211
|
+
return path
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
log.debug "rev direction is short tip, now testing reverse" if log.debug?
|
|
216
|
+
onode.reverse!
|
|
217
|
+
if is_short_tip?(onode)[0]
|
|
218
|
+
log.debug "short tip in both directions, there is no good neighbour" if log.debug?
|
|
219
|
+
#short tip in both directions, so not a real contig
|
|
220
|
+
return nil
|
|
221
|
+
else
|
|
222
|
+
log.debug "reverse direction not a short tip, going with that" if log.debug?
|
|
223
|
+
return find_node_from_non_short_tip.call(dummy_trail)
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def remove_seen_nodes_from_end_of_path(path, seen_nodes)
|
|
228
|
+
log.debug "Removing from the end of the path #{path.to_shorthand} any nodes in set of size #{seen_nodes.length}" if log.debug?
|
|
229
|
+
while !path.trail.empty?
|
|
230
|
+
last_node_index = path.length-1
|
|
231
|
+
last_node = path[last_node_index]
|
|
232
|
+
|
|
233
|
+
if seen_nodes.include?([last_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
|
|
234
|
+
seen_nodes.include?([last_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
|
|
235
|
+
path.trail.delete_at(last_node_index)
|
|
236
|
+
else
|
|
237
|
+
# Last node is not previously seen, chop no further.
|
|
238
|
+
break
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
return path
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Assemble considering reads all reads as single ended. Options:
|
|
245
|
+
# :max_tip_length: if a path is shorter than this in bp, then it will be clipped from the path. Default 100
|
|
246
|
+
# :recoherence_kmer: attempt to separate paths by going back to the reads with this larger kmer
|
|
247
|
+
# :leash_length: don't continue assembly from nodes farther than this distance (in bp) away
|
|
248
|
+
def assemble_from(initial_path, visited_onodes=Set.new)
|
|
249
|
+
options = @assembly_options
|
|
250
|
+
|
|
251
|
+
recoherencer = Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder.new
|
|
252
|
+
|
|
253
|
+
path = initial_path.copy
|
|
254
|
+
#visited_onodes = Set.new
|
|
255
|
+
initial_path[0...-1].each do |onode| #Add all except the last node to already seen nodes list
|
|
256
|
+
visited_onodes << onode.to_settable
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
dummy_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
260
|
+
oneighbours = nil
|
|
261
|
+
while true
|
|
262
|
+
log.debug "Now assembling from #{path[-1].to_shorthand}" if log.debug?
|
|
263
|
+
if visited_onodes.include?(path[-1].to_settable)
|
|
264
|
+
log.debug "Found circularisation in path, going no further" if log.debug?
|
|
265
|
+
break
|
|
266
|
+
else
|
|
267
|
+
visited_onodes << path[-1].to_settable
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
if options[:leash_length] and path.length_in_bp-@graph.hash_length > options[:leash_length]
|
|
271
|
+
log.debug "Beyond leash length, going to further with assembly" if log.debug?
|
|
272
|
+
break
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
oneighbours = path.neighbours_of_last_node(@graph)
|
|
276
|
+
if oneighbours.length == 0
|
|
277
|
+
log.debug "Found a dead end, last node is #{path[-1].to_shorthand}" if log.debug?
|
|
278
|
+
break
|
|
279
|
+
|
|
280
|
+
elsif oneighbours.length == 1
|
|
281
|
+
to_add = oneighbours[0]
|
|
282
|
+
log.debug "Only one way to go, so going there, to #{to_add.to_shorthand}" if log.debug?
|
|
283
|
+
path.add_oriented_node to_add
|
|
284
|
+
|
|
285
|
+
else
|
|
286
|
+
# Reached a fork (or 3 or 4-fork), which way to go?
|
|
287
|
+
|
|
288
|
+
# Remove neighbours that are short tips
|
|
289
|
+
oneighbours, visiteds = remove_tips(oneighbours, @assembly_options[:max_tip_length])
|
|
290
|
+
visiteds.each do |onode_settable|
|
|
291
|
+
visited_onodes << onode_settable
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
if oneighbours.length == 0
|
|
295
|
+
log.debug "Found a dead end at a fork, last node is #{path[-1].to_shorthand}" if log.debug?
|
|
296
|
+
break
|
|
297
|
+
elsif oneighbours.length == 1
|
|
298
|
+
log.debug "Clipped short tip(s) off, and then there was only one way to go" if log.debug?
|
|
299
|
+
path.add_oriented_node oneighbours[0]
|
|
300
|
+
elsif options[:recoherence_kmer].nil?
|
|
301
|
+
if log.debug?
|
|
302
|
+
neighbours_string = oneighbours.collect do |oneigh|
|
|
303
|
+
oneigh.to_shorthand
|
|
304
|
+
end.join(' or ')
|
|
305
|
+
log.debug "Came across what appears to be a legitimate fork to nodes #{neighbours_string} and no recoherence kmer given, so giving up" if log.debug?
|
|
306
|
+
end
|
|
307
|
+
break
|
|
308
|
+
else
|
|
309
|
+
unless options[:recoherence_kmer].nil?
|
|
310
|
+
log.debug "Attempting to resolve fork by recoherence" if log.debug?
|
|
311
|
+
oneighbours.select! do |oneigh|
|
|
312
|
+
dummy_trail.trail = path.trail+[oneigh]
|
|
313
|
+
recoherencer.validate_last_node_of_path_by_recoherence(
|
|
314
|
+
dummy_trail,
|
|
315
|
+
options[:recoherence_kmer],
|
|
316
|
+
options[:sequences],
|
|
317
|
+
options[:min_confirming_recoherence_kmer_reads]
|
|
318
|
+
)
|
|
319
|
+
end
|
|
320
|
+
end
|
|
321
|
+
if oneighbours.length == 0
|
|
322
|
+
log.debug "no neighbours passed recoherence, giving up" if log.debug?
|
|
323
|
+
break
|
|
324
|
+
elsif oneighbours.length == 1
|
|
325
|
+
log.debug "After recoherence there's only one way to go, going there"
|
|
326
|
+
path.add_oriented_node oneighbours[0]
|
|
327
|
+
elsif options[:max_coverage_at_fork]
|
|
328
|
+
oneighbours.select! do |oneigh|
|
|
329
|
+
oneigh.node.coverage <= options[:max_coverage_at_fork]
|
|
330
|
+
end
|
|
331
|
+
log.debug "Found #{oneighbours.length} neighbours after removing nodes over max coverage" if log.debug?
|
|
332
|
+
|
|
333
|
+
if oneighbours.length == 1
|
|
334
|
+
log.debug "After removing too much coverage neighbours there's only one way to go, going there"
|
|
335
|
+
path.add_oriented_node oneighbours[0]
|
|
336
|
+
else
|
|
337
|
+
log.debug "After removing max coverage nodes, #{oneighbours.length} neighbours found (#{oneighbours.collect{|o| o.to_shorthand}.join(",") }), giving up" if log.debug?
|
|
338
|
+
break
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
else
|
|
343
|
+
log.debug "Still forked after recoherence (to #{oneighbours.collect{|on| on.to_shorthand}.join(' & ') }), so seems to be a legitimate fork, giving up" if log.debug?
|
|
344
|
+
break
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
visited_onodes << path[-1].to_settable
|
|
351
|
+
|
|
352
|
+
return path, visited_onodes
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# Given a list of possibilities for neighbours of a node, return the
|
|
356
|
+
# neighbour(s) that are not short tips, or the longest of the short tips
|
|
357
|
+
# if all are tips. Also return an enumerable of nodes visited from the cut off
|
|
358
|
+
# short tips
|
|
359
|
+
def remove_tips(oriented_neighbours, tip_distance)
|
|
360
|
+
return [], [] if oriented_neighbours.empty?
|
|
361
|
+
|
|
362
|
+
neighbours_and_triples = oriented_neighbours.collect do |oneigh|
|
|
363
|
+
[
|
|
364
|
+
oneigh,
|
|
365
|
+
find_tip_distance(oneigh, tip_distance)
|
|
366
|
+
]
|
|
367
|
+
end
|
|
368
|
+
non_tips, tips = neighbours_and_triples.partition{|nt| nt[1][0] == false}
|
|
369
|
+
|
|
370
|
+
visiteds = Set.new
|
|
371
|
+
process_tip = lambda do |tip|
|
|
372
|
+
visiteds << tip[0].to_settable
|
|
373
|
+
tip[1][2].each {|v| visiteds << v}
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
if non_tips.length > 0
|
|
377
|
+
tips.each do |tip|
|
|
378
|
+
process_tip.call tip
|
|
379
|
+
end
|
|
380
|
+
return non_tips.collect{|t| t[0]}, visiteds
|
|
381
|
+
else
|
|
382
|
+
# no long distances here. Just go with the longest path
|
|
383
|
+
best_tip = tips.max{|nt| nt[1][1]}
|
|
384
|
+
tips.each do |tip|
|
|
385
|
+
unless tip == best_tip
|
|
386
|
+
process_tip.call tip
|
|
387
|
+
end
|
|
388
|
+
end
|
|
389
|
+
return [best_tip[0]], visiteds
|
|
390
|
+
end
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
# Returns false iff there is a path longer than max_tip_length
|
|
394
|
+
# starting at the given oriented_node. Currently works as a depth
|
|
395
|
+
# first search, which may or may not be optimal
|
|
396
|
+
def is_short_tip?(oriented_node)
|
|
397
|
+
max_tip_length = @assembly_options[:max_tip_length]
|
|
398
|
+
is_tip, max_distance, visited_onodes = find_tip_distance(oriented_node, max_tip_length)
|
|
399
|
+
return is_tip, visited_onodes
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# The workhorse function of is_short_tip?
|
|
403
|
+
#
|
|
404
|
+
#
|
|
405
|
+
def find_tip_distance(oriented_node, max_tip_length)
|
|
406
|
+
stack = DS::Stack.new
|
|
407
|
+
first = MaxDistancedOrientedNode.new
|
|
408
|
+
first.onode = oriented_node
|
|
409
|
+
first.distance = oriented_node.node.length_alone
|
|
410
|
+
stack.push first
|
|
411
|
+
|
|
412
|
+
cache = {}
|
|
413
|
+
max_dist = first.distance
|
|
414
|
+
|
|
415
|
+
while current_max_distanced_onode = stack.pop
|
|
416
|
+
if current_max_distanced_onode.distance > max_tip_length
|
|
417
|
+
return false, current_max_distanced_onode.distance, []
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
max_dist = [max_dist, current_max_distanced_onode.distance].max
|
|
421
|
+
|
|
422
|
+
current_max_distanced_onode.onode.next_neighbours(@graph).each do |oneigh|
|
|
423
|
+
neighbour_distance = current_max_distanced_onode.distance + oneigh.node.length_alone
|
|
424
|
+
next if cache[oneigh.to_settable] and cache[oneigh.to_settable] >= neighbour_distance
|
|
425
|
+
distanced_node = MaxDistancedOrientedNode.new
|
|
426
|
+
distanced_node.onode = oneigh
|
|
427
|
+
distanced_node.distance = neighbour_distance
|
|
428
|
+
log.debug "The distance of #{distanced_node.onode.node_id} is at least #{neighbour_distance}" if log.debug?
|
|
429
|
+
cache[oneigh.to_settable] = neighbour_distance
|
|
430
|
+
stack.push distanced_node
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
log.debug "Found insufficient max tip length #{max_dist} for #{oriented_node}" if log.debug?
|
|
435
|
+
return true, max_dist, cache.collect{|donode| donode[0]}
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
class MaxDistancedOrientedNode
|
|
439
|
+
attr_accessor :onode, :distance
|
|
440
|
+
end
|
|
441
|
+
end
|