finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
require 'yaml'
|
|
2
|
+
|
|
3
|
+
class Bio::FinishM::BadFormatWriter
|
|
4
|
+
def initialize
|
|
5
|
+
@to_yamlify = []
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def add_metapath(name, metapath)
|
|
9
|
+
to_write = []
|
|
10
|
+
metapath.each do |onode_or_bubble|
|
|
11
|
+
if onode_or_bubble.kind_of?(Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode)
|
|
12
|
+
next_to_write = {}
|
|
13
|
+
next_to_write['type'] = 'regular'
|
|
14
|
+
next_to_write['node'] = onode_or_bubble.to_shorthand
|
|
15
|
+
next_to_write['coverage'] = onode_or_bubble.node.coverage
|
|
16
|
+
to_write << next_to_write
|
|
17
|
+
else
|
|
18
|
+
# bubble
|
|
19
|
+
paths = []
|
|
20
|
+
onode_or_bubble.each_path do |path|
|
|
21
|
+
next_to_write = {}
|
|
22
|
+
next_to_write['nodes'] = path.to_shorthand
|
|
23
|
+
next_to_write['coverage'] = path.coverage
|
|
24
|
+
paths << next_to_write
|
|
25
|
+
end
|
|
26
|
+
to_write << {
|
|
27
|
+
'type' => 'bubble',
|
|
28
|
+
'paths' => paths
|
|
29
|
+
}
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
@to_yamlify << {
|
|
34
|
+
'contig_name' => name,
|
|
35
|
+
'graph' => to_write.to_yaml
|
|
36
|
+
}
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def yaml
|
|
40
|
+
@to_yamlify.to_yaml
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def write(output_io)
|
|
44
|
+
output_io.print yaml
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
require 'bio-samtools'
|
|
2
|
+
|
|
3
|
+
module Bio
|
|
4
|
+
module AssemblyGraphAlgorithms
|
|
5
|
+
class BamProbeReadSelector
|
|
6
|
+
include Bio::FinishM::Logging
|
|
7
|
+
|
|
8
|
+
# Given an indexed bam file of reads mapped onto contigs,
|
|
9
|
+
# an array of one or more [contig_name, position, direction] entries (i.e. places in the contigs to locate reads for),
|
|
10
|
+
# a kmer (the match has to be at least one perfect kmer overlapping the position) and a
|
|
11
|
+
# path to a CnyUnifiedSeq.names file, return an Array of read_IDs of reads that can be used to locate the contig
|
|
12
|
+
# ends in the velvet graph.
|
|
13
|
+
#
|
|
14
|
+
# This assumes that velvet hasn't done anything to clean up the graph as cleaning might remove reads
|
|
15
|
+
# of interest
|
|
16
|
+
def find_probes(indexed_bam_file, contig_names_positions_directions, kmer, path_to_cny_unified_seq_names_file)
|
|
17
|
+
# need to check the sequence of the aligned read is the same as what is in the cny_unified_seq_names_file
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Given a contig name and a side, together with a path to an indexed bam file,
|
|
21
|
+
# pick out a read that can be used to 'locate'
|
|
22
|
+
# the contig end in the assembly, and return a Bio::DB::Alignment object of it
|
|
23
|
+
def find_probe_read_alignment_from_contig_end(indexed_bam_file, contig_name, direction, position, kmer)
|
|
24
|
+
# Search for all reads that overlap the overhang base, and are in the correct direction
|
|
25
|
+
sam = Bio::DB::Sam.new(:bam => indexed_bam_file)
|
|
26
|
+
position_hash = {:chr => contig_name}
|
|
27
|
+
|
|
28
|
+
# The probes must overlap the position, to one back from
|
|
29
|
+
# the contig end
|
|
30
|
+
if direction
|
|
31
|
+
position_hash[:start] = position-1
|
|
32
|
+
position_hash[:stop] = position
|
|
33
|
+
else
|
|
34
|
+
position_hash[:start] = position
|
|
35
|
+
position_hash[:stop] = position+1
|
|
36
|
+
end
|
|
37
|
+
sam.each_alignment(position_hash) do |alignment|
|
|
38
|
+
# Reject reads that do not have matching stretches of DNA that are at least kmer length long
|
|
39
|
+
# as these will not be included in the assembly.
|
|
40
|
+
# If it passes, then return the alignment
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Return the 'best' read's name and sequence.
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,842 @@
|
|
|
1
|
+
require 'set'
|
|
2
|
+
require 'ds'
|
|
3
|
+
|
|
4
|
+
# Like DS::PriorityQueue except give the ability to define how priority is given
|
|
5
|
+
class DS::AnyPriorityQueue < DS::PriorityQueue
|
|
6
|
+
#Create new priority queue. Internaly uses heap to store elements.
|
|
7
|
+
def initialize
|
|
8
|
+
@store = DS::BinaryHeap.new {|parent,child| yield parent.key, child.key}
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def each
|
|
12
|
+
@store.to_a.each do |pair|
|
|
13
|
+
yield pair.value
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Bio::AssemblyGraphAlgorithms::BubblyAssembler < Bio::AssemblyGraphAlgorithms::SingleEndedAssembler
|
|
21
|
+
include Bio::FinishM::Logging
|
|
22
|
+
|
|
23
|
+
DEFAULT_MAX_BUBBLE_LENGTH = 500
|
|
24
|
+
DEFAULT_BUBBLE_NODE_COUNT_LIMIT = 20 #so, so very 'un-educated' guess
|
|
25
|
+
DEFAULT_BUBBLE_FORK_LIMIT = 20
|
|
26
|
+
DEFAULT_MAX_CYCLES = 1
|
|
27
|
+
|
|
28
|
+
def initialize(graph, assembly_options={})
|
|
29
|
+
opts = assembly_options
|
|
30
|
+
opts[:max_bubble_length] ||= DEFAULT_MAX_BUBBLE_LENGTH
|
|
31
|
+
opts[:bubble_node_count_limit] ||= DEFAULT_BUBBLE_NODE_COUNT_LIMIT
|
|
32
|
+
opts[:bubble_fork_limit] ||= DEFAULT_BUBBLE_FORK_LIMIT
|
|
33
|
+
opts[:max_cycles] ||= DEFAULT_MAX_CYCLES
|
|
34
|
+
super graph, opts
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Starting at a node within a graph, walk through the graph
|
|
38
|
+
# accepting forks, so long as the fork paths converge within some finite
|
|
39
|
+
# length in the graph (the leash length, measured in number of base pairs).
|
|
40
|
+
#
|
|
41
|
+
# Return an Array of Path arrays, a MetaPath, where each path array are the different paths
|
|
42
|
+
# that can be taken at each fork point
|
|
43
|
+
def assemble_from(starting_path, visited_oriented_node_settables=Set.new)
|
|
44
|
+
leash_length = @assembly_options[:max_bubble_length]
|
|
45
|
+
if log.info? and starting_path.kind_of?(Bio::Velvet::Graph::OrientedNodeTrail)
|
|
46
|
+
log.info "Assembling from: #{starting_path.to_shorthand}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
filter_neighbours = lambda do |neighbours|
|
|
50
|
+
legit_neighbours, visiteds = remove_tips(neighbours, @assembly_options[:max_tip_length])
|
|
51
|
+
visiteds.each do |onode|
|
|
52
|
+
log.debug "Adding #{onode} to list of visited nodes" if log.debug?
|
|
53
|
+
visited_oriented_node_settables << onode
|
|
54
|
+
end
|
|
55
|
+
legit_neighbours
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
filterVisited = lambda do |oneigh|
|
|
59
|
+
visited_oriented_node_settables.include? oneigh.to_settable
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# set up basic dynamic programming problem
|
|
63
|
+
baseProblem = lambda do |oneigh|
|
|
64
|
+
new_problem = DynamicProgrammingProblem.new
|
|
65
|
+
new_problem.distance = 0
|
|
66
|
+
new_path = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
67
|
+
new_path.add_oriented_node oneigh
|
|
68
|
+
new_problem.path = new_path
|
|
69
|
+
new_problem.ubiquitous_oriented_nodes = Set.new
|
|
70
|
+
new_problem.ubiquitous_oriented_nodes << oneigh.to_settable
|
|
71
|
+
new_problem.visited_oriented_nodes = Set.new
|
|
72
|
+
new_problem.visited_oriented_nodes << oneigh.to_settable
|
|
73
|
+
new_problem
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# extend dynamic programming problem
|
|
77
|
+
extendedProblem = lambda do |problem, oneigh|
|
|
78
|
+
new_problem = DynamicProgrammingProblem.new
|
|
79
|
+
new_problem.distance = problem.distance + problem.path[-1].node.length_alone
|
|
80
|
+
new_path = problem.path.copy
|
|
81
|
+
new_path.add_oriented_node oneigh
|
|
82
|
+
new_problem.path = new_path
|
|
83
|
+
new_problem.ubiquitous_oriented_nodes = Set.new problem.ubiquitous_oriented_nodes
|
|
84
|
+
new_problem.ubiquitous_oriented_nodes << oneigh.to_settable
|
|
85
|
+
new_problem.visited_oriented_nodes = Set.new problem.visited_oriented_nodes
|
|
86
|
+
new_problem.visited_oriented_nodes << oneigh.to_settable
|
|
87
|
+
new_problem.circular_path_detected = true if problem.visited_oriented_nodes.include? oneigh.to_settable
|
|
88
|
+
new_problem
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
current_bubble = nil
|
|
92
|
+
metapath = MetaPath.new
|
|
93
|
+
starting_path.each do |oriented_node|
|
|
94
|
+
log.debug "adding onode at the start: #{oriented_node.to_shorthand}" if log.debug?
|
|
95
|
+
metapath << oriented_node
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Keep track of nodes visited in this trajectory already so circuits can be avoided
|
|
99
|
+
#visited_oriented_node_settables = Set.new
|
|
100
|
+
starting_path.each do |e|
|
|
101
|
+
if e.kind_of?(Bubble)
|
|
102
|
+
e.oriented_nodes do |onode|
|
|
103
|
+
visited_oriented_node_settables << onode.to_settable
|
|
104
|
+
end
|
|
105
|
+
else
|
|
106
|
+
visited_oriented_node_settables << e.to_settable
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
#log.debug "Starting with visited nodes #{visited_oriented_node_settables.to_a.join(',')}" if log.debug?
|
|
110
|
+
|
|
111
|
+
current_mode = :linear # :linear, :bubble, or :finished
|
|
112
|
+
|
|
113
|
+
while current_mode != :finished
|
|
114
|
+
if current_mode == :linear
|
|
115
|
+
log.debug "Starting a non-bubble from #{metapath.to_shorthand}" if log.debug?
|
|
116
|
+
while true
|
|
117
|
+
oriented_neighbours = metapath.last_oriented_node.next_neighbours(@graph)
|
|
118
|
+
log.debug "Found oriented neighbours #{oriented_neighbours.collect{|onode| onode.to_shorthand} }" if log.debug?
|
|
119
|
+
|
|
120
|
+
legit_neighbours = nil
|
|
121
|
+
# Cut off tips unless it is the only way
|
|
122
|
+
if oriented_neighbours.length == 1
|
|
123
|
+
legit_neighbours = oriented_neighbours
|
|
124
|
+
else
|
|
125
|
+
legit_neighbours = filter_neighbours.call(oriented_neighbours)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
if legit_neighbours.empty?
|
|
129
|
+
# This is just a straight out dead end, and we can go no further.
|
|
130
|
+
log.debug "Dead end reached" if log.debug?
|
|
131
|
+
metapath.fate = MetaPath::DEAD_END_FATE
|
|
132
|
+
current_mode = :finished
|
|
133
|
+
break
|
|
134
|
+
elsif legit_neighbours.length == 1
|
|
135
|
+
# Linear thing here, just keep moving forward
|
|
136
|
+
neighbour = legit_neighbours[0]
|
|
137
|
+
|
|
138
|
+
# Stop if a circuit is detected
|
|
139
|
+
# Tim - Always stop on a circuit in linear mode. "We cannot get out." - Book of Mazarbul.
|
|
140
|
+
if visited_oriented_node_settables.include?(neighbour.to_settable)
|
|
141
|
+
log.debug "Detected circuit in linear mode by running into #{neighbour.to_settable}" if log.debug?
|
|
142
|
+
metapath.fate = MetaPath::CIRCUIT_FATE
|
|
143
|
+
current_mode = :finished
|
|
144
|
+
break
|
|
145
|
+
else
|
|
146
|
+
visited_oriented_node_settables << neighbour.to_settable
|
|
147
|
+
metapath << neighbour
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
else
|
|
151
|
+
# Reached a fork in the graph here, the point of this algorithm, really.
|
|
152
|
+
current_bubble = Bubble.new metapath.last_oriented_node
|
|
153
|
+
log.debug "Starting a bubble forking from metapath #{metapath.to_shorthand}" if log.debug?
|
|
154
|
+
|
|
155
|
+
if legit_neighbours.all? &filterVisited
|
|
156
|
+
log.debug "Detected fork in linear mode where all neighbours have been previously traversed. This is effectively a dead end." if log.debug?
|
|
157
|
+
metapath.fate = MetaPath::CIRCUIT_FATE
|
|
158
|
+
current_mode = :finished
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
legit_neighbours.each do |oneigh|
|
|
162
|
+
new_problem = baseProblem.call oneigh
|
|
163
|
+
log.debug "Adding problem to bubble: #{new_problem}" if log.debug?
|
|
164
|
+
|
|
165
|
+
current_bubble.enqueue new_problem
|
|
166
|
+
current_mode = :bubble
|
|
167
|
+
end
|
|
168
|
+
break
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
elsif current_mode == :bubble
|
|
174
|
+
# We are in a bubble. Go get some.
|
|
175
|
+
log.debug "entering bubble mode" if log.debug?
|
|
176
|
+
|
|
177
|
+
# next problem = queue.shift. while distance of next problem is not beyond the leash length
|
|
178
|
+
while current_mode == :bubble
|
|
179
|
+
problem = current_bubble.shift
|
|
180
|
+
|
|
181
|
+
if problem.nil?
|
|
182
|
+
# Getting here seems improbable if not impossible.
|
|
183
|
+
# The current bubble doesn't converge and just has short tips at the end, don't add it to the metapath
|
|
184
|
+
metapath.fate = MetaPath::DEAD_END_FATE
|
|
185
|
+
current_mode = :finished
|
|
186
|
+
log.debug "Reached a dead end, ignoring this path" if log.debug?
|
|
187
|
+
break
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
log.debug "Dequeued #{problem.to_shorthand}" if log.debug?
|
|
191
|
+
if !leash_length.nil? and problem.distance > leash_length
|
|
192
|
+
# The current bubble doesn't converge, don't add it to the metapath
|
|
193
|
+
metapath.fate = MetaPath::DIVERGES_FATE
|
|
194
|
+
current_mode = :finished
|
|
195
|
+
log.debug "Bubble is past the leash length of #{leash_length}, giving up" if log.debug?
|
|
196
|
+
break
|
|
197
|
+
elsif current_bubble.convergent_on?(problem)
|
|
198
|
+
log.debug "Bubble #{current_bubble.to_shorthand} convergent on #{problem.to_shorthand}" if log.debug?
|
|
199
|
+
current_bubble.converge_on problem
|
|
200
|
+
# convergement!
|
|
201
|
+
# Bubble ended in a convergent fashion
|
|
202
|
+
|
|
203
|
+
metapath << current_bubble
|
|
204
|
+
# Add the nodes in the bubble to the list of visited nodes
|
|
205
|
+
current_bubble.oriented_nodes do |onode|
|
|
206
|
+
visited_oriented_node_settables << onode.to_settable
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
current_bubble = nil
|
|
210
|
+
current_mode = :linear
|
|
211
|
+
break
|
|
212
|
+
else
|
|
213
|
+
# otherwise we must search on in the bubble
|
|
214
|
+
# get all neighbours that are not short tips
|
|
215
|
+
log.debug "Bubble not convergent on #{problem.to_shorthand}" if log.debug?
|
|
216
|
+
|
|
217
|
+
neighbours = problem.path.neighbours_of_last_node(@graph)
|
|
218
|
+
|
|
219
|
+
# If there is only 1 way to go, go there
|
|
220
|
+
if neighbours.length == 1
|
|
221
|
+
log.debug "Only one way to go from this node, going there" if log.debug?
|
|
222
|
+
|
|
223
|
+
oneigh = neighbours[0]
|
|
224
|
+
new_problem = extendedProblem.call problem, oneigh
|
|
225
|
+
current_bubble.enqueue new_problem
|
|
226
|
+
log.debug "Enqueued #{new_problem.to_shorthand}, total nodes now #{current_bubble.num_known_problems} and num forks #{current_bubble.num_legit_forks}" if log.debug?
|
|
227
|
+
|
|
228
|
+
# check to make sure we aren't going overboard in the bubbly-ness
|
|
229
|
+
if !@assembly_options[:bubble_node_count_limit].nil? and current_bubble.num_known_problems > @assembly_options[:bubble_node_count_limit]
|
|
230
|
+
log.debug "Too complex a bubble detected, giving up" if log.debug?
|
|
231
|
+
metapath.fate = MetaPath::NODE_COUNT_LIMIT_REACHED
|
|
232
|
+
current_mode = :finished
|
|
233
|
+
break
|
|
234
|
+
end
|
|
235
|
+
else
|
|
236
|
+
legit_neighbours = filter_neighbours.call(neighbours)
|
|
237
|
+
|
|
238
|
+
if legit_neighbours.length == 0
|
|
239
|
+
# this is a kind of 'long' tip, possibly unlikely to happen much.
|
|
240
|
+
# Forget about it and progress to the next problem having effectively
|
|
241
|
+
# removed it from the bubble
|
|
242
|
+
log.debug "Found no neighbours to re-enqueue" if log.debug?
|
|
243
|
+
else
|
|
244
|
+
# Increment complexity counter if this is a real fork
|
|
245
|
+
if legit_neighbours.length > 1
|
|
246
|
+
current_bubble.num_legit_forks += 1
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
legit_neighbours.each do |oneigh|
|
|
250
|
+
new_problem = extendedProblem.call problem, oneigh
|
|
251
|
+
current_bubble.enqueue new_problem
|
|
252
|
+
log.debug "Enqueued #{new_problem.to_shorthand}, total nodes now #{current_bubble.num_known_problems} and num forks #{current_bubble.num_legit_forks}" if log.debug?
|
|
253
|
+
|
|
254
|
+
# check to make sure we aren't going overboard in the bubbly-ness
|
|
255
|
+
if (!@assembly_options[:bubble_fork_limit].nil? and current_bubble.num_legit_forks > @assembly_options[:bubble_fork_limit]) or
|
|
256
|
+
(!@assembly_options[:bubble_node_count_limit].nil? and current_bubble.num_known_problems > @assembly_options[:bubble_node_count_limit])
|
|
257
|
+
log.debug "Too complex a bubble detected, giving up" if log.debug?
|
|
258
|
+
metapath.fate = MetaPath::NODE_COUNT_LIMIT_REACHED
|
|
259
|
+
current_mode = :finished
|
|
260
|
+
break
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
else
|
|
268
|
+
raise "Programming error: Unexpected mode: #{current_mode}"
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
log.debug "Reached end of main loop in mode #{current_mode}" if log.debug?
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
return metapath, visited_oriented_node_settables
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def seen_last_in_path?(path, seen_nodes)
|
|
278
|
+
last = path[-1]
|
|
279
|
+
if last.kind_of?(Bubble)
|
|
280
|
+
return remove_seen_nodes_from_end_of_path(path, seen_nodes).length < path.length
|
|
281
|
+
else
|
|
282
|
+
return seen_nodes.include?(path[-1].to_settable)
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def remove_seen_nodes_from_end_of_path(path, seen_nodes)
|
|
288
|
+
log.debug "Removing from the end of the path #{path.to_shorthand} any nodes in set of length #{seen_nodes.length}" if log.debug?
|
|
289
|
+
|
|
290
|
+
node_seen = lambda do |oriented_node|
|
|
291
|
+
seen_nodes.include?([oriented_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
|
|
292
|
+
seen_nodes.include?([oriented_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
while !path.empty?
|
|
296
|
+
last_node_or_bubble_index = path.length-1
|
|
297
|
+
last_node_or_bubble = path[last_node_or_bubble_index]
|
|
298
|
+
|
|
299
|
+
delete = false
|
|
300
|
+
if last_node_or_bubble.kind_of?(Bubble)
|
|
301
|
+
last_node_or_bubble.oriented_nodes do |onode|
|
|
302
|
+
if node_seen.call(onode)
|
|
303
|
+
delete = true
|
|
304
|
+
break
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
else
|
|
308
|
+
delete = node_seen.call(last_node_or_bubble)
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
if delete
|
|
312
|
+
path.delete_at last_node_or_bubble_index
|
|
313
|
+
else
|
|
314
|
+
# Last node is not previously seen, chop no further.
|
|
315
|
+
break
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
return path
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class MetaPath
|
|
324
|
+
DIVERGES_FATE = 'diverges'
|
|
325
|
+
DEAD_END_FATE = 'dead end'
|
|
326
|
+
CIRCUIT_FATE = 'circuit'
|
|
327
|
+
NODE_COUNT_LIMIT_REACHED = 'too many nodes in bubble'
|
|
328
|
+
#CIRCUIT_WITHIN_BUBBLE_FATE = 'circuit within bubble' #Tim - shouldn't end metapath
|
|
329
|
+
|
|
330
|
+
# How does this metapath end?
|
|
331
|
+
attr_accessor :fate
|
|
332
|
+
|
|
333
|
+
include Enumerable
|
|
334
|
+
|
|
335
|
+
def initialize
|
|
336
|
+
@internal_array = []
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
def each
|
|
340
|
+
@internal_array.each do |e|
|
|
341
|
+
yield e
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def [](index)
|
|
346
|
+
@internal_array[index]
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def delete_at(index)
|
|
350
|
+
@internal_array.delete_at index
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def empty?
|
|
354
|
+
@internal_array.empty?
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
def last_oriented_node
|
|
358
|
+
e = @internal_array[-1]
|
|
359
|
+
if e.kind_of?(Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode)
|
|
360
|
+
return e
|
|
361
|
+
else
|
|
362
|
+
# it is a bubble
|
|
363
|
+
return e.converging_oriented_node
|
|
364
|
+
end
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def <<(oriented_node_or_bubble)
|
|
368
|
+
@internal_array << oriented_node_or_bubble
|
|
369
|
+
end
|
|
370
|
+
alias_method :push, :<<
|
|
371
|
+
|
|
372
|
+
def to_shorthand
|
|
373
|
+
@internal_array.collect{|e| e.to_shorthand}.join(',')
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
def reverse!
|
|
377
|
+
# Do regular reversal
|
|
378
|
+
@internal_array.reverse!
|
|
379
|
+
|
|
380
|
+
# Reverse all the internal parts
|
|
381
|
+
@internal_array.each do |e|
|
|
382
|
+
e.reverse!
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
return nil
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def length
|
|
389
|
+
@internal_array.length
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
# Yield all oriented nodes anywhere in the regular or bubble
|
|
393
|
+
# bits.
|
|
394
|
+
def each_oriented_node
|
|
395
|
+
@internal_array.each do |e|
|
|
396
|
+
if e.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
|
|
397
|
+
e.oriented_nodes.each do |onode|
|
|
398
|
+
yield onode
|
|
399
|
+
end
|
|
400
|
+
else
|
|
401
|
+
yield e
|
|
402
|
+
end
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
def length_in_bp
|
|
407
|
+
sum = 0
|
|
408
|
+
each do |e|
|
|
409
|
+
if e.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
|
|
410
|
+
sum += e.reference_trail.length_in_bp_within_path
|
|
411
|
+
else
|
|
412
|
+
sum += e.node.length_alone
|
|
413
|
+
end
|
|
414
|
+
end
|
|
415
|
+
return sum
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
def reference_trail
|
|
419
|
+
trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
420
|
+
|
|
421
|
+
trail.trail = collect do |e|
|
|
422
|
+
if e.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
|
|
423
|
+
e.reference_trail.trail
|
|
424
|
+
else
|
|
425
|
+
e
|
|
426
|
+
end
|
|
427
|
+
end.flatten
|
|
428
|
+
|
|
429
|
+
return trail
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def sequence
|
|
433
|
+
reference_trail.sequence
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
def coverage
|
|
437
|
+
coverages = []
|
|
438
|
+
lengths = []
|
|
439
|
+
each do |onode_or_bubble|
|
|
440
|
+
if onode_or_bubble.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
|
|
441
|
+
# Length isn't obvious, but let's go with reference path length just coz that's easy
|
|
442
|
+
this_length = onode_or_bubble.reference_trail.length_in_bp_within_path
|
|
443
|
+
lengths.push this_length
|
|
444
|
+
|
|
445
|
+
# Coverage of a bubble is the coverage of each node in the bubble
|
|
446
|
+
# each weighted by their length
|
|
447
|
+
coverages.push onode_or_bubble.coverage
|
|
448
|
+
else
|
|
449
|
+
#regular node. So simple average coverage
|
|
450
|
+
coverages.push onode_or_bubble.node.coverage
|
|
451
|
+
lengths.push onode_or_bubble.node.length_alone
|
|
452
|
+
end
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# Then a simple weighted average
|
|
456
|
+
i = -1
|
|
457
|
+
total_length = lengths.reduce(:+)
|
|
458
|
+
|
|
459
|
+
answer = coverages.reduce(0.0) do |sum, cov|
|
|
460
|
+
i += 1
|
|
461
|
+
sum + (cov * lengths[i].to_f / total_length)
|
|
462
|
+
end
|
|
463
|
+
answer
|
|
464
|
+
end
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
# Tim - use 'waiting train' algorithm (made up by me).
|
|
469
|
+
# Problems collect the nodes they visit, adding them to hashes of 'ubiquitous' and 'visited' nodes
|
|
470
|
+
# (metaphor: 'train' (problem) visiting 'stations' (nodes)).
|
|
471
|
+
# Each time a problem is dequeued, new problems are enqueued for all neighbours to the problem node
|
|
472
|
+
# (metaphor: 'trains' (problems) magically duplicate for each path to a new 'station' (node) (methaphor
|
|
473
|
+
# breaks a bit here)).
|
|
474
|
+
# At each step the algorithm dequeues a problem, prioritising problems by shortest distance of any path
|
|
475
|
+
# to the problem node, meaning if a a problem is enqueued for a node that is already known, then that
|
|
476
|
+
# problem is prioritised (metaphor: when a train leaves a station (problem is deqeued) other 'trains' will
|
|
477
|
+
# wait in case it catches up, or otherwise reaches a more distant station).
|
|
478
|
+
# If a new problem is enqueued for a problem node that is currently in enqueued, the new problem is added
|
|
479
|
+
# to known problems removed from queue, and when a problem is dequeued, its ubiquitous and visited nodes
|
|
480
|
+
# are set to the ubiquitous and visited nodes of all known problems for the node (metaphor: the carriages
|
|
481
|
+
# of all trains at a station are merged into one train).
|
|
482
|
+
# Cycles occur when a problem reaches a node that is in its visited nodes hash (metaphor: a station that
|
|
483
|
+
# one of the train carriages has previously visited).
|
|
484
|
+
# Queued cyclic problems are added to known problems and then dropped.
|
|
485
|
+
# Bubble is converged when all current problems have a ubiquitous node in common (metaphor: all carriages
|
|
486
|
+
# of all current trains have visited a station).
|
|
487
|
+
class Bubble
|
|
488
|
+
include Bio::FinishM::Logging
|
|
489
|
+
|
|
490
|
+
# The DynamicProgrammingProblem this bubble converges on
|
|
491
|
+
attr_reader :converging_oriented_node_settable, :is_reverse, :root
|
|
492
|
+
|
|
493
|
+
# how many legit forks have been explored
|
|
494
|
+
attr_accessor :num_legit_forks
|
|
495
|
+
|
|
496
|
+
def initialize(bubble_root, options = {})
|
|
497
|
+
@queue = DS::AnyPriorityQueue.new {|a,b| a<=b}
|
|
498
|
+
@known_problems = {}
|
|
499
|
+
@current_problems = Set.new
|
|
500
|
+
@num_legit_forks = 0
|
|
501
|
+
@max_cycles = options[:max_cycles] || DEFAULT_MAX_CYCLES
|
|
502
|
+
@root = bubble_root
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
# Return the next closest dynamic programming problem,
|
|
506
|
+
# removing it from the bubble
|
|
507
|
+
def shift
|
|
508
|
+
prob = @queue.shift
|
|
509
|
+
unless prob.nil?
|
|
510
|
+
prob.ubiquitous_oriented_nodes = ubiquitous_oriented_nodes(prob)
|
|
511
|
+
prob.visited_oriented_nodes = visited_oriented_nodes(prob)
|
|
512
|
+
@current_problems.delete prob.to_settable
|
|
513
|
+
end
|
|
514
|
+
return prob
|
|
515
|
+
end
|
|
516
|
+
|
|
517
|
+
def visited_oriented_nodes(prob)
|
|
518
|
+
#all visited nodes for relevant problems
|
|
519
|
+
@known_problems[prob.to_settable].reduce(prob.ubiquitous_oriented_nodes) do |memo, problem|
|
|
520
|
+
memo + problem.ubiquitous_oriented_nodes
|
|
521
|
+
end
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
def ubiquitous_oriented_nodes(prob)
|
|
525
|
+
#only ubiquitous nodes from relevant problems
|
|
526
|
+
@known_problems[prob.to_settable].reduce(prob.ubiquitous_oriented_nodes) do |memo, problem|
|
|
527
|
+
memo & problem.ubiquitous_oriented_nodes
|
|
528
|
+
end
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
def shortest_problem_distance(prob)
|
|
532
|
+
# prioritise by the shortest distance for current problem
|
|
533
|
+
@known_problems[prob.to_settable].collect{|prob| prob.distance}.min
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
def enqueue(dynamic_programming_problem)
|
|
537
|
+
settable = dynamic_programming_problem.to_settable
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
@known_problems[settable] ||= []
|
|
541
|
+
@known_problems[settable].push dynamic_programming_problem
|
|
542
|
+
|
|
543
|
+
# don't requeue current problem or circular problem
|
|
544
|
+
unless dynamic_programming_problem.circular_path_detected == true or @current_problems.include? settable
|
|
545
|
+
@queue.enqueue dynamic_programming_problem, shortest_problem_distance(dynamic_programming_problem)
|
|
546
|
+
@current_problems << settable
|
|
547
|
+
end
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
# return true if the given problem converges the bubble, else false
|
|
552
|
+
def convergent_on?(dynamic_programming_problem)
|
|
553
|
+
settable = dynamic_programming_problem.to_settable
|
|
554
|
+
|
|
555
|
+
@queue.each do |problem| #convergent until not
|
|
556
|
+
return false unless ubiquitous_oriented_nodes(problem).include? settable
|
|
557
|
+
end
|
|
558
|
+
return true
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
# Finish off the bubble, assuming convergent_on? the given problem == true
|
|
562
|
+
def converge_on(dynamic_programming_problem)
|
|
563
|
+
@converging_oriented_node_settable = dynamic_programming_problem.to_settable
|
|
564
|
+
#free some memory
|
|
565
|
+
@queue = nil
|
|
566
|
+
@current_problems = nil
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
# yield or failing that return an Array of the list of oriented_nodes found
|
|
570
|
+
# in at least one path in this (presumed converged) bubble
|
|
571
|
+
def oriented_nodes
|
|
572
|
+
raise unless converged?
|
|
573
|
+
seen_nodes = {}
|
|
574
|
+
stack = DS::Stack.new
|
|
575
|
+
initial_solution = @known_problems[@converging_oriented_node_settable][0]
|
|
576
|
+
converging_onode = initial_solution.path[-1]
|
|
577
|
+
stack.push converging_onode
|
|
578
|
+
|
|
579
|
+
while onode = stack.pop
|
|
580
|
+
settable = onode.to_settable
|
|
581
|
+
next if seen_nodes.key?(settable)
|
|
582
|
+
|
|
583
|
+
if block_given?
|
|
584
|
+
if @is_reverse
|
|
585
|
+
yield onode.reverse
|
|
586
|
+
else
|
|
587
|
+
yield onode
|
|
588
|
+
end
|
|
589
|
+
end
|
|
590
|
+
|
|
591
|
+
seen_nodes[settable] = onode
|
|
592
|
+
|
|
593
|
+
# queue neighbours for paths that don't contain the converging onode
|
|
594
|
+
@known_problems[settable].each do |dpp|
|
|
595
|
+
stack.push dpp.path[-2] unless dpp.path.length < 2 or dpp.path[0...-1].include? converging_onode
|
|
596
|
+
end
|
|
597
|
+
end
|
|
598
|
+
|
|
599
|
+
return nil if block_given?
|
|
600
|
+
return seen_nodes.values
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
def num_known_problems
|
|
604
|
+
@known_problems.length
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
# Iterate over the paths returning each as an OrientedNodeTrail.
|
|
609
|
+
# Assumes the path is convergent.
|
|
610
|
+
def each_path(options = {})
|
|
611
|
+
raise unless converged?
|
|
612
|
+
max_cycles = options[:max_cycles] || @max_cycles
|
|
613
|
+
|
|
614
|
+
# Metric used to prioritise each_path
|
|
615
|
+
comparator = lambda do |problem1, problem2|
|
|
616
|
+
onode1 = nil
|
|
617
|
+
onode2 = nil
|
|
618
|
+
if problem1.path.length == 1 and problem2.path.length > 1
|
|
619
|
+
# Here the comparison cannot be made on 2nd last node coverages
|
|
620
|
+
# since one of the paths goes straight from the initial to the terminal
|
|
621
|
+
# node. Choose instead based on if the second last node has higher or lower
|
|
622
|
+
# coverage than the final node
|
|
623
|
+
onode1 = problem1.path[-1]
|
|
624
|
+
onode2 = problem2.path[-2]
|
|
625
|
+
elsif problem2.path.length == 1 and problem1.path.length > 1
|
|
626
|
+
onode1 = problem1.path[-2]
|
|
627
|
+
onode2 = problem2.path[-1]
|
|
628
|
+
else
|
|
629
|
+
onode1 = problem1.path[-2]
|
|
630
|
+
onode2 = problem2.path[-2]
|
|
631
|
+
end
|
|
632
|
+
#log.debug "Comparing nodes #{onode1.node.node_id} and #{onode2.node.node_id}" if log.debug?
|
|
633
|
+
|
|
634
|
+
if onode1.node.coverage == onode2.node.coverage
|
|
635
|
+
-(onode1.node.node_id <=> onode2.node.node_id)
|
|
636
|
+
else
|
|
637
|
+
onode1.node.coverage <=> onode2.node.coverage
|
|
638
|
+
end
|
|
639
|
+
end
|
|
640
|
+
|
|
641
|
+
log.debug "Iterating through each path of bubble" if log.debug?
|
|
642
|
+
|
|
643
|
+
# Tim - use stack and push paths with lowest coverage first
|
|
644
|
+
stack = DS::Stack.new
|
|
645
|
+
counter = Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder::CycleCounter.new max_cycles
|
|
646
|
+
initial_solution = @known_problems[@converging_oriented_node_settable][0]
|
|
647
|
+
stack.push [initial_solution.path, []]
|
|
648
|
+
converging_onode = converging_oriented_node
|
|
649
|
+
#log.debug "Pushed to stack #{initial_solution.path.to_shorthand}" if log.debug?
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
while path_parts = stack.pop
|
|
653
|
+
direct_node_trail = path_parts[0]
|
|
654
|
+
second_part = path_parts[1]
|
|
655
|
+
#log.debug "Popped #{direct_node_trail.to_shorthand} and [#{second_part.collect{|o| o.to_shorthand}.join(',') }]" if log.debug?
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
if direct_node_trail.trail.length == 0
|
|
659
|
+
|
|
660
|
+
# check for cycles through bubble root
|
|
661
|
+
if second_part.include? @root
|
|
662
|
+
#log.debug "Found cycle through bubble root." if log.debug?
|
|
663
|
+
@circuitous = true unless @circuitous
|
|
664
|
+
if max_cycles == 0 or max_cycles < counter.path_cycle_count([@root]+second_part)
|
|
665
|
+
#log.debug "Not finishing cyclic path with too many cycles." if log.debug?
|
|
666
|
+
next
|
|
667
|
+
end
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
yield_path = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
671
|
+
yield_path.trail = second_part
|
|
672
|
+
if @is_reverse
|
|
673
|
+
yield_path = yield_path.reverse
|
|
674
|
+
end
|
|
675
|
+
log.debug "Yielded #{yield_path.to_shorthand}" if log.debug?
|
|
676
|
+
yield yield_path
|
|
677
|
+
else
|
|
678
|
+
# go down the path, looking for other paths
|
|
679
|
+
head_onode = direct_node_trail.trail[-1]
|
|
680
|
+
new_second_part = [head_onode]+second_part
|
|
681
|
+
if second_part.length > 1 and head_onode == converging_oriented_node
|
|
682
|
+
#log.debug "Ignoring path with cycle through converged node." if log.debug?
|
|
683
|
+
next
|
|
684
|
+
end
|
|
685
|
+
if second_part.include? head_onode
|
|
686
|
+
#log.debug "Cycle at node #{head_onode.node_id} in path #{second_part.collect{|onode| onode.node.node_id}.join(',')}." if log.debug?
|
|
687
|
+
@circuitous = true unless @circuitous
|
|
688
|
+
if max_cycles == 0 or max_cycles < counter.path_cycle_count(new_second_part)
|
|
689
|
+
#log.debug "Not finishing cyclic path with too many cycles." if log.debug?
|
|
690
|
+
next
|
|
691
|
+
end
|
|
692
|
+
end
|
|
693
|
+
|
|
694
|
+
new_problems = @known_problems[head_onode.to_settable]
|
|
695
|
+
#log.debug "Found new problems: #{new_problems.collect{|prob| prob.to_shorthand}.join(' ') }" if log.debug?
|
|
696
|
+
|
|
697
|
+
problem_leads = Set.new
|
|
698
|
+
filtered_problems = new_problems.reject do |new_problem|
|
|
699
|
+
# Only enqueue paths where the second-to-head onode is not already queued
|
|
700
|
+
unless new_problem.path.length < 2
|
|
701
|
+
lead_settable = new_problem.path[-2].to_settable
|
|
702
|
+
if problem_leads.include? lead_settable
|
|
703
|
+
#log.debug "Ignoring duplicate neighbour problem #{new_problem.to_shorthand}" if log.debug?
|
|
704
|
+
next true
|
|
705
|
+
end
|
|
706
|
+
problem_leads << lead_settable
|
|
707
|
+
end
|
|
708
|
+
false
|
|
709
|
+
end
|
|
710
|
+
|
|
711
|
+
filtered_problems.sort(&comparator).each do |new_problem|
|
|
712
|
+
# TODO: deal with circuits
|
|
713
|
+
new_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
714
|
+
new_trail.trail = new_problem.path[0...-1]
|
|
715
|
+
#log.debug "Enqueuing #{new_trail.to_shorthand} and [#{new_second_part.collect{|o| o.to_shorthand}.join(',') }]" if log.debug?
|
|
716
|
+
stack.push [new_trail, new_second_part]
|
|
717
|
+
end
|
|
718
|
+
end
|
|
719
|
+
end
|
|
720
|
+
end
|
|
721
|
+
|
|
722
|
+
def paths
|
|
723
|
+
to_return = []
|
|
724
|
+
each_path do |path|
|
|
725
|
+
to_return.push path
|
|
726
|
+
end
|
|
727
|
+
to_return
|
|
728
|
+
end
|
|
729
|
+
|
|
730
|
+
def converged?
|
|
731
|
+
!@converging_oriented_node_settable.nil?
|
|
732
|
+
end
|
|
733
|
+
|
|
734
|
+
# Return the OrientedNode that converges this bubble, behaviour
|
|
735
|
+
# undefined if bubble is not converged
|
|
736
|
+
def converging_oriented_node
|
|
737
|
+
@known_problems[@converging_oriented_node_settable][0].path[-1]
|
|
738
|
+
end
|
|
739
|
+
|
|
740
|
+
def to_shorthand
|
|
741
|
+
shorts = []
|
|
742
|
+
if converged?
|
|
743
|
+
shorts = paths.sort{|a,b| a.to_shorthand <=> b.to_shorthand }.collect{|path| path.to_shorthand}
|
|
744
|
+
else
|
|
745
|
+
@queue.each do |problem|
|
|
746
|
+
shorts.push problem.to_shorthand
|
|
747
|
+
end
|
|
748
|
+
end
|
|
749
|
+
return "{#{shorts.join('|') }}"
|
|
750
|
+
end
|
|
751
|
+
|
|
752
|
+
def reverse!
|
|
753
|
+
@is_reverse ||= false
|
|
754
|
+
@is_reverse = !@is_reverse
|
|
755
|
+
end
|
|
756
|
+
|
|
757
|
+
# This doesn't make sense unless this is a converged bubble and the index == -1
|
|
758
|
+
# because otherwise there is multiple answers
|
|
759
|
+
def [](index)
|
|
760
|
+
raise unless index == -1
|
|
761
|
+
return Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new(
|
|
762
|
+
@converging_oriented_node_settable[0],
|
|
763
|
+
@converging_oriented_node_settable[1]
|
|
764
|
+
)
|
|
765
|
+
end
|
|
766
|
+
|
|
767
|
+
# Return one trail that exemplifies the paths through this bubble.
|
|
768
|
+
# Current method of path selection is simply greedy, taking the highest coverage node
|
|
769
|
+
# at each fork (or failing that the node with the lower node_id).
|
|
770
|
+
def reference_trail(max_cycles = @max_cycles)
|
|
771
|
+
raise unless converged?
|
|
772
|
+
|
|
773
|
+
converging_onode = converging_oriented_node
|
|
774
|
+
log.debug "Finding reference trail from node #{converging_onode.node.node_id}" if log.debug?
|
|
775
|
+
|
|
776
|
+
reference_trail = nil
|
|
777
|
+
each_path do |path|
|
|
778
|
+
#break when first path is found
|
|
779
|
+
reference_trail = path
|
|
780
|
+
break
|
|
781
|
+
end
|
|
782
|
+
|
|
783
|
+
return reference_trail
|
|
784
|
+
end
|
|
785
|
+
|
|
786
|
+
# Does this (coverged) bubble contain any circuits?
|
|
787
|
+
def circuitous?
|
|
788
|
+
raise unless converged?
|
|
789
|
+
if @circuitous.nil?
|
|
790
|
+
each_path({:max_cycles => 0}) {|| break if @circuitous}
|
|
791
|
+
@circuitous ||= false
|
|
792
|
+
end
|
|
793
|
+
@circuitous
|
|
794
|
+
end
|
|
795
|
+
|
|
796
|
+
# Coverage of a bubble is the coverage of each node in the bubble
|
|
797
|
+
# each weighted by their length
|
|
798
|
+
def coverage
|
|
799
|
+
sum = 0.0
|
|
800
|
+
length = 0
|
|
801
|
+
oriented_nodes do |onode|
|
|
802
|
+
node_length = onode.node.length_alone
|
|
803
|
+
sum += onode.node.coverage * node_length
|
|
804
|
+
length += node_length
|
|
805
|
+
end
|
|
806
|
+
return sum / length
|
|
807
|
+
end
|
|
808
|
+
end
|
|
809
|
+
|
|
810
|
+
class DynamicProgrammingProblem
|
|
811
|
+
attr_accessor :path, :ubiquitous_oriented_nodes, :visited_oriented_nodes, :distance, :circular_path_detected
|
|
812
|
+
|
|
813
|
+
def initialize
|
|
814
|
+
@path = []
|
|
815
|
+
@ubiquitous_oriented_nodes = Set.new
|
|
816
|
+
end
|
|
817
|
+
|
|
818
|
+
def to_settable
|
|
819
|
+
@path[-1].to_settable
|
|
820
|
+
end
|
|
821
|
+
|
|
822
|
+
def to_s
|
|
823
|
+
ubiquitous_nodes = @ubiquitous_oriented_nodes.collect do |settabled|
|
|
824
|
+
"#{settabled[0] }#{settabled[1] == Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST ? 's' : 'e'}"
|
|
825
|
+
end
|
|
826
|
+
return "DPP #{self.object_id}: #{@path.to_shorthand}/#{ubiquitous_nodes.join(',') }/#{distance}"
|
|
827
|
+
end
|
|
828
|
+
|
|
829
|
+
def to_shorthand
|
|
830
|
+
ubiquitous_nodes = @ubiquitous_oriented_nodes.collect do |settabled|
|
|
831
|
+
"#{settabled[0] }#{settabled[1] == Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST ? 's' : 'e'}"
|
|
832
|
+
end
|
|
833
|
+
"#{@path.to_shorthand}/#{ubiquitous_nodes.join(',') }/#{distance}"
|
|
834
|
+
end
|
|
835
|
+
end
|
|
836
|
+
|
|
837
|
+
class ComparableArray < Array
|
|
838
|
+
include Comparable
|
|
839
|
+
end
|
|
840
|
+
|
|
841
|
+
class CircuitousPathDetected < Exception; end
|
|
842
|
+
end
|