finishm 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
class Bio::FinishM::BadFormatWriter
|
4
|
+
def initialize
|
5
|
+
@to_yamlify = []
|
6
|
+
end
|
7
|
+
|
8
|
+
def add_metapath(name, metapath)
|
9
|
+
to_write = []
|
10
|
+
metapath.each do |onode_or_bubble|
|
11
|
+
if onode_or_bubble.kind_of?(Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode)
|
12
|
+
next_to_write = {}
|
13
|
+
next_to_write['type'] = 'regular'
|
14
|
+
next_to_write['node'] = onode_or_bubble.to_shorthand
|
15
|
+
next_to_write['coverage'] = onode_or_bubble.node.coverage
|
16
|
+
to_write << next_to_write
|
17
|
+
else
|
18
|
+
# bubble
|
19
|
+
paths = []
|
20
|
+
onode_or_bubble.each_path do |path|
|
21
|
+
next_to_write = {}
|
22
|
+
next_to_write['nodes'] = path.to_shorthand
|
23
|
+
next_to_write['coverage'] = path.coverage
|
24
|
+
paths << next_to_write
|
25
|
+
end
|
26
|
+
to_write << {
|
27
|
+
'type' => 'bubble',
|
28
|
+
'paths' => paths
|
29
|
+
}
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
@to_yamlify << {
|
34
|
+
'contig_name' => name,
|
35
|
+
'graph' => to_write.to_yaml
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
def yaml
|
40
|
+
@to_yamlify.to_yaml
|
41
|
+
end
|
42
|
+
|
43
|
+
def write(output_io)
|
44
|
+
output_io.print yaml
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'bio-samtools'
|
2
|
+
|
3
|
+
module Bio
|
4
|
+
module AssemblyGraphAlgorithms
|
5
|
+
class BamProbeReadSelector
|
6
|
+
include Bio::FinishM::Logging
|
7
|
+
|
8
|
+
# Given an indexed bam file of reads mapped onto contigs,
|
9
|
+
# an array of one or more [contig_name, position, direction] entries (i.e. places in the contigs to locate reads for),
|
10
|
+
# a kmer (the match has to be at least one perfect kmer overlapping the position) and a
|
11
|
+
# path to a CnyUnifiedSeq.names file, return an Array of read_IDs of reads that can be used to locate the contig
|
12
|
+
# ends in the velvet graph.
|
13
|
+
#
|
14
|
+
# This assumes that velvet hasn't done anything to clean up the graph as cleaning might remove reads
|
15
|
+
# of interest
|
16
|
+
def find_probes(indexed_bam_file, contig_names_positions_directions, kmer, path_to_cny_unified_seq_names_file)
|
17
|
+
# need to check the sequence of the aligned read is the same as what is in the cny_unified_seq_names_file
|
18
|
+
end
|
19
|
+
|
20
|
+
# Given a contig name and a side, together with a path to an indexed bam file,
|
21
|
+
# pick out a read that can be used to 'locate'
|
22
|
+
# the contig end in the assembly, and return a Bio::DB::Alignment object of it
|
23
|
+
def find_probe_read_alignment_from_contig_end(indexed_bam_file, contig_name, direction, position, kmer)
|
24
|
+
# Search for all reads that overlap the overhang base, and are in the correct direction
|
25
|
+
sam = Bio::DB::Sam.new(:bam => indexed_bam_file)
|
26
|
+
position_hash = {:chr => contig_name}
|
27
|
+
|
28
|
+
# The probes must overlap the position, to one back from
|
29
|
+
# the contig end
|
30
|
+
if direction
|
31
|
+
position_hash[:start] = position-1
|
32
|
+
position_hash[:stop] = position
|
33
|
+
else
|
34
|
+
position_hash[:start] = position
|
35
|
+
position_hash[:stop] = position+1
|
36
|
+
end
|
37
|
+
sam.each_alignment(position_hash) do |alignment|
|
38
|
+
# Reject reads that do not have matching stretches of DNA that are at least kmer length long
|
39
|
+
# as these will not be included in the assembly.
|
40
|
+
# If it passes, then return the alignment
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
# Return the 'best' read's name and sequence.
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,842 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'ds'
|
3
|
+
|
4
|
+
# Like DS::PriorityQueue except give the ability to define how priority is given
|
5
|
+
class DS::AnyPriorityQueue < DS::PriorityQueue
|
6
|
+
#Create new priority queue. Internaly uses heap to store elements.
|
7
|
+
def initialize
|
8
|
+
@store = DS::BinaryHeap.new {|parent,child| yield parent.key, child.key}
|
9
|
+
end
|
10
|
+
|
11
|
+
def each
|
12
|
+
@store.to_a.each do |pair|
|
13
|
+
yield pair.value
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
class Bio::AssemblyGraphAlgorithms::BubblyAssembler < Bio::AssemblyGraphAlgorithms::SingleEndedAssembler
|
21
|
+
include Bio::FinishM::Logging
|
22
|
+
|
23
|
+
DEFAULT_MAX_BUBBLE_LENGTH = 500
|
24
|
+
DEFAULT_BUBBLE_NODE_COUNT_LIMIT = 20 #so, so very 'un-educated' guess
|
25
|
+
DEFAULT_BUBBLE_FORK_LIMIT = 20
|
26
|
+
DEFAULT_MAX_CYCLES = 1
|
27
|
+
|
28
|
+
def initialize(graph, assembly_options={})
|
29
|
+
opts = assembly_options
|
30
|
+
opts[:max_bubble_length] ||= DEFAULT_MAX_BUBBLE_LENGTH
|
31
|
+
opts[:bubble_node_count_limit] ||= DEFAULT_BUBBLE_NODE_COUNT_LIMIT
|
32
|
+
opts[:bubble_fork_limit] ||= DEFAULT_BUBBLE_FORK_LIMIT
|
33
|
+
opts[:max_cycles] ||= DEFAULT_MAX_CYCLES
|
34
|
+
super graph, opts
|
35
|
+
end
|
36
|
+
|
37
|
+
# Starting at a node within a graph, walk through the graph
|
38
|
+
# accepting forks, so long as the fork paths converge within some finite
|
39
|
+
# length in the graph (the leash length, measured in number of base pairs).
|
40
|
+
#
|
41
|
+
# Return an Array of Path arrays, a MetaPath, where each path array are the different paths
|
42
|
+
# that can be taken at each fork point
|
43
|
+
def assemble_from(starting_path, visited_oriented_node_settables=Set.new)
|
44
|
+
leash_length = @assembly_options[:max_bubble_length]
|
45
|
+
if log.info? and starting_path.kind_of?(Bio::Velvet::Graph::OrientedNodeTrail)
|
46
|
+
log.info "Assembling from: #{starting_path.to_shorthand}"
|
47
|
+
end
|
48
|
+
|
49
|
+
filter_neighbours = lambda do |neighbours|
|
50
|
+
legit_neighbours, visiteds = remove_tips(neighbours, @assembly_options[:max_tip_length])
|
51
|
+
visiteds.each do |onode|
|
52
|
+
log.debug "Adding #{onode} to list of visited nodes" if log.debug?
|
53
|
+
visited_oriented_node_settables << onode
|
54
|
+
end
|
55
|
+
legit_neighbours
|
56
|
+
end
|
57
|
+
|
58
|
+
filterVisited = lambda do |oneigh|
|
59
|
+
visited_oriented_node_settables.include? oneigh.to_settable
|
60
|
+
end
|
61
|
+
|
62
|
+
# set up basic dynamic programming problem
|
63
|
+
baseProblem = lambda do |oneigh|
|
64
|
+
new_problem = DynamicProgrammingProblem.new
|
65
|
+
new_problem.distance = 0
|
66
|
+
new_path = Bio::Velvet::Graph::OrientedNodeTrail.new
|
67
|
+
new_path.add_oriented_node oneigh
|
68
|
+
new_problem.path = new_path
|
69
|
+
new_problem.ubiquitous_oriented_nodes = Set.new
|
70
|
+
new_problem.ubiquitous_oriented_nodes << oneigh.to_settable
|
71
|
+
new_problem.visited_oriented_nodes = Set.new
|
72
|
+
new_problem.visited_oriented_nodes << oneigh.to_settable
|
73
|
+
new_problem
|
74
|
+
end
|
75
|
+
|
76
|
+
# extend dynamic programming problem
|
77
|
+
extendedProblem = lambda do |problem, oneigh|
|
78
|
+
new_problem = DynamicProgrammingProblem.new
|
79
|
+
new_problem.distance = problem.distance + problem.path[-1].node.length_alone
|
80
|
+
new_path = problem.path.copy
|
81
|
+
new_path.add_oriented_node oneigh
|
82
|
+
new_problem.path = new_path
|
83
|
+
new_problem.ubiquitous_oriented_nodes = Set.new problem.ubiquitous_oriented_nodes
|
84
|
+
new_problem.ubiquitous_oriented_nodes << oneigh.to_settable
|
85
|
+
new_problem.visited_oriented_nodes = Set.new problem.visited_oriented_nodes
|
86
|
+
new_problem.visited_oriented_nodes << oneigh.to_settable
|
87
|
+
new_problem.circular_path_detected = true if problem.visited_oriented_nodes.include? oneigh.to_settable
|
88
|
+
new_problem
|
89
|
+
end
|
90
|
+
|
91
|
+
current_bubble = nil
|
92
|
+
metapath = MetaPath.new
|
93
|
+
starting_path.each do |oriented_node|
|
94
|
+
log.debug "adding onode at the start: #{oriented_node.to_shorthand}" if log.debug?
|
95
|
+
metapath << oriented_node
|
96
|
+
end
|
97
|
+
|
98
|
+
# Keep track of nodes visited in this trajectory already so circuits can be avoided
|
99
|
+
#visited_oriented_node_settables = Set.new
|
100
|
+
starting_path.each do |e|
|
101
|
+
if e.kind_of?(Bubble)
|
102
|
+
e.oriented_nodes do |onode|
|
103
|
+
visited_oriented_node_settables << onode.to_settable
|
104
|
+
end
|
105
|
+
else
|
106
|
+
visited_oriented_node_settables << e.to_settable
|
107
|
+
end
|
108
|
+
end
|
109
|
+
#log.debug "Starting with visited nodes #{visited_oriented_node_settables.to_a.join(',')}" if log.debug?
|
110
|
+
|
111
|
+
current_mode = :linear # :linear, :bubble, or :finished
|
112
|
+
|
113
|
+
while current_mode != :finished
|
114
|
+
if current_mode == :linear
|
115
|
+
log.debug "Starting a non-bubble from #{metapath.to_shorthand}" if log.debug?
|
116
|
+
while true
|
117
|
+
oriented_neighbours = metapath.last_oriented_node.next_neighbours(@graph)
|
118
|
+
log.debug "Found oriented neighbours #{oriented_neighbours.collect{|onode| onode.to_shorthand} }" if log.debug?
|
119
|
+
|
120
|
+
legit_neighbours = nil
|
121
|
+
# Cut off tips unless it is the only way
|
122
|
+
if oriented_neighbours.length == 1
|
123
|
+
legit_neighbours = oriented_neighbours
|
124
|
+
else
|
125
|
+
legit_neighbours = filter_neighbours.call(oriented_neighbours)
|
126
|
+
end
|
127
|
+
|
128
|
+
if legit_neighbours.empty?
|
129
|
+
# This is just a straight out dead end, and we can go no further.
|
130
|
+
log.debug "Dead end reached" if log.debug?
|
131
|
+
metapath.fate = MetaPath::DEAD_END_FATE
|
132
|
+
current_mode = :finished
|
133
|
+
break
|
134
|
+
elsif legit_neighbours.length == 1
|
135
|
+
# Linear thing here, just keep moving forward
|
136
|
+
neighbour = legit_neighbours[0]
|
137
|
+
|
138
|
+
# Stop if a circuit is detected
|
139
|
+
# Tim - Always stop on a circuit in linear mode. "We cannot get out." - Book of Mazarbul.
|
140
|
+
if visited_oriented_node_settables.include?(neighbour.to_settable)
|
141
|
+
log.debug "Detected circuit in linear mode by running into #{neighbour.to_settable}" if log.debug?
|
142
|
+
metapath.fate = MetaPath::CIRCUIT_FATE
|
143
|
+
current_mode = :finished
|
144
|
+
break
|
145
|
+
else
|
146
|
+
visited_oriented_node_settables << neighbour.to_settable
|
147
|
+
metapath << neighbour
|
148
|
+
end
|
149
|
+
|
150
|
+
else
|
151
|
+
# Reached a fork in the graph here, the point of this algorithm, really.
|
152
|
+
current_bubble = Bubble.new metapath.last_oriented_node
|
153
|
+
log.debug "Starting a bubble forking from metapath #{metapath.to_shorthand}" if log.debug?
|
154
|
+
|
155
|
+
if legit_neighbours.all? &filterVisited
|
156
|
+
log.debug "Detected fork in linear mode where all neighbours have been previously traversed. This is effectively a dead end." if log.debug?
|
157
|
+
metapath.fate = MetaPath::CIRCUIT_FATE
|
158
|
+
current_mode = :finished
|
159
|
+
end
|
160
|
+
|
161
|
+
legit_neighbours.each do |oneigh|
|
162
|
+
new_problem = baseProblem.call oneigh
|
163
|
+
log.debug "Adding problem to bubble: #{new_problem}" if log.debug?
|
164
|
+
|
165
|
+
current_bubble.enqueue new_problem
|
166
|
+
current_mode = :bubble
|
167
|
+
end
|
168
|
+
break
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
|
173
|
+
elsif current_mode == :bubble
|
174
|
+
# We are in a bubble. Go get some.
|
175
|
+
log.debug "entering bubble mode" if log.debug?
|
176
|
+
|
177
|
+
# next problem = queue.shift. while distance of next problem is not beyond the leash length
|
178
|
+
while current_mode == :bubble
|
179
|
+
problem = current_bubble.shift
|
180
|
+
|
181
|
+
if problem.nil?
|
182
|
+
# Getting here seems improbable if not impossible.
|
183
|
+
# The current bubble doesn't converge and just has short tips at the end, don't add it to the metapath
|
184
|
+
metapath.fate = MetaPath::DEAD_END_FATE
|
185
|
+
current_mode = :finished
|
186
|
+
log.debug "Reached a dead end, ignoring this path" if log.debug?
|
187
|
+
break
|
188
|
+
end
|
189
|
+
|
190
|
+
log.debug "Dequeued #{problem.to_shorthand}" if log.debug?
|
191
|
+
if !leash_length.nil? and problem.distance > leash_length
|
192
|
+
# The current bubble doesn't converge, don't add it to the metapath
|
193
|
+
metapath.fate = MetaPath::DIVERGES_FATE
|
194
|
+
current_mode = :finished
|
195
|
+
log.debug "Bubble is past the leash length of #{leash_length}, giving up" if log.debug?
|
196
|
+
break
|
197
|
+
elsif current_bubble.convergent_on?(problem)
|
198
|
+
log.debug "Bubble #{current_bubble.to_shorthand} convergent on #{problem.to_shorthand}" if log.debug?
|
199
|
+
current_bubble.converge_on problem
|
200
|
+
# convergement!
|
201
|
+
# Bubble ended in a convergent fashion
|
202
|
+
|
203
|
+
metapath << current_bubble
|
204
|
+
# Add the nodes in the bubble to the list of visited nodes
|
205
|
+
current_bubble.oriented_nodes do |onode|
|
206
|
+
visited_oriented_node_settables << onode.to_settable
|
207
|
+
end
|
208
|
+
|
209
|
+
current_bubble = nil
|
210
|
+
current_mode = :linear
|
211
|
+
break
|
212
|
+
else
|
213
|
+
# otherwise we must search on in the bubble
|
214
|
+
# get all neighbours that are not short tips
|
215
|
+
log.debug "Bubble not convergent on #{problem.to_shorthand}" if log.debug?
|
216
|
+
|
217
|
+
neighbours = problem.path.neighbours_of_last_node(@graph)
|
218
|
+
|
219
|
+
# If there is only 1 way to go, go there
|
220
|
+
if neighbours.length == 1
|
221
|
+
log.debug "Only one way to go from this node, going there" if log.debug?
|
222
|
+
|
223
|
+
oneigh = neighbours[0]
|
224
|
+
new_problem = extendedProblem.call problem, oneigh
|
225
|
+
current_bubble.enqueue new_problem
|
226
|
+
log.debug "Enqueued #{new_problem.to_shorthand}, total nodes now #{current_bubble.num_known_problems} and num forks #{current_bubble.num_legit_forks}" if log.debug?
|
227
|
+
|
228
|
+
# check to make sure we aren't going overboard in the bubbly-ness
|
229
|
+
if !@assembly_options[:bubble_node_count_limit].nil? and current_bubble.num_known_problems > @assembly_options[:bubble_node_count_limit]
|
230
|
+
log.debug "Too complex a bubble detected, giving up" if log.debug?
|
231
|
+
metapath.fate = MetaPath::NODE_COUNT_LIMIT_REACHED
|
232
|
+
current_mode = :finished
|
233
|
+
break
|
234
|
+
end
|
235
|
+
else
|
236
|
+
legit_neighbours = filter_neighbours.call(neighbours)
|
237
|
+
|
238
|
+
if legit_neighbours.length == 0
|
239
|
+
# this is a kind of 'long' tip, possibly unlikely to happen much.
|
240
|
+
# Forget about it and progress to the next problem having effectively
|
241
|
+
# removed it from the bubble
|
242
|
+
log.debug "Found no neighbours to re-enqueue" if log.debug?
|
243
|
+
else
|
244
|
+
# Increment complexity counter if this is a real fork
|
245
|
+
if legit_neighbours.length > 1
|
246
|
+
current_bubble.num_legit_forks += 1
|
247
|
+
end
|
248
|
+
|
249
|
+
legit_neighbours.each do |oneigh|
|
250
|
+
new_problem = extendedProblem.call problem, oneigh
|
251
|
+
current_bubble.enqueue new_problem
|
252
|
+
log.debug "Enqueued #{new_problem.to_shorthand}, total nodes now #{current_bubble.num_known_problems} and num forks #{current_bubble.num_legit_forks}" if log.debug?
|
253
|
+
|
254
|
+
# check to make sure we aren't going overboard in the bubbly-ness
|
255
|
+
if (!@assembly_options[:bubble_fork_limit].nil? and current_bubble.num_legit_forks > @assembly_options[:bubble_fork_limit]) or
|
256
|
+
(!@assembly_options[:bubble_node_count_limit].nil? and current_bubble.num_known_problems > @assembly_options[:bubble_node_count_limit])
|
257
|
+
log.debug "Too complex a bubble detected, giving up" if log.debug?
|
258
|
+
metapath.fate = MetaPath::NODE_COUNT_LIMIT_REACHED
|
259
|
+
current_mode = :finished
|
260
|
+
break
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
else
|
268
|
+
raise "Programming error: Unexpected mode: #{current_mode}"
|
269
|
+
end
|
270
|
+
|
271
|
+
log.debug "Reached end of main loop in mode #{current_mode}" if log.debug?
|
272
|
+
end
|
273
|
+
|
274
|
+
return metapath, visited_oriented_node_settables
|
275
|
+
end
|
276
|
+
|
277
|
+
def seen_last_in_path?(path, seen_nodes)
|
278
|
+
last = path[-1]
|
279
|
+
if last.kind_of?(Bubble)
|
280
|
+
return remove_seen_nodes_from_end_of_path(path, seen_nodes).length < path.length
|
281
|
+
else
|
282
|
+
return seen_nodes.include?(path[-1].to_settable)
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
|
287
|
+
def remove_seen_nodes_from_end_of_path(path, seen_nodes)
|
288
|
+
log.debug "Removing from the end of the path #{path.to_shorthand} any nodes in set of length #{seen_nodes.length}" if log.debug?
|
289
|
+
|
290
|
+
node_seen = lambda do |oriented_node|
|
291
|
+
seen_nodes.include?([oriented_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
|
292
|
+
seen_nodes.include?([oriented_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
|
293
|
+
end
|
294
|
+
|
295
|
+
while !path.empty?
|
296
|
+
last_node_or_bubble_index = path.length-1
|
297
|
+
last_node_or_bubble = path[last_node_or_bubble_index]
|
298
|
+
|
299
|
+
delete = false
|
300
|
+
if last_node_or_bubble.kind_of?(Bubble)
|
301
|
+
last_node_or_bubble.oriented_nodes do |onode|
|
302
|
+
if node_seen.call(onode)
|
303
|
+
delete = true
|
304
|
+
break
|
305
|
+
end
|
306
|
+
end
|
307
|
+
else
|
308
|
+
delete = node_seen.call(last_node_or_bubble)
|
309
|
+
end
|
310
|
+
|
311
|
+
if delete
|
312
|
+
path.delete_at last_node_or_bubble_index
|
313
|
+
else
|
314
|
+
# Last node is not previously seen, chop no further.
|
315
|
+
break
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
return path
|
320
|
+
end
|
321
|
+
|
322
|
+
|
323
|
+
class MetaPath
|
324
|
+
DIVERGES_FATE = 'diverges'
|
325
|
+
DEAD_END_FATE = 'dead end'
|
326
|
+
CIRCUIT_FATE = 'circuit'
|
327
|
+
NODE_COUNT_LIMIT_REACHED = 'too many nodes in bubble'
|
328
|
+
#CIRCUIT_WITHIN_BUBBLE_FATE = 'circuit within bubble' #Tim - shouldn't end metapath
|
329
|
+
|
330
|
+
# How does this metapath end?
|
331
|
+
attr_accessor :fate
|
332
|
+
|
333
|
+
include Enumerable
|
334
|
+
|
335
|
+
def initialize
|
336
|
+
@internal_array = []
|
337
|
+
end
|
338
|
+
|
339
|
+
def each
|
340
|
+
@internal_array.each do |e|
|
341
|
+
yield e
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
345
|
+
def [](index)
|
346
|
+
@internal_array[index]
|
347
|
+
end
|
348
|
+
|
349
|
+
def delete_at(index)
|
350
|
+
@internal_array.delete_at index
|
351
|
+
end
|
352
|
+
|
353
|
+
def empty?
|
354
|
+
@internal_array.empty?
|
355
|
+
end
|
356
|
+
|
357
|
+
def last_oriented_node
|
358
|
+
e = @internal_array[-1]
|
359
|
+
if e.kind_of?(Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode)
|
360
|
+
return e
|
361
|
+
else
|
362
|
+
# it is a bubble
|
363
|
+
return e.converging_oriented_node
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
def <<(oriented_node_or_bubble)
|
368
|
+
@internal_array << oriented_node_or_bubble
|
369
|
+
end
|
370
|
+
alias_method :push, :<<
|
371
|
+
|
372
|
+
def to_shorthand
|
373
|
+
@internal_array.collect{|e| e.to_shorthand}.join(',')
|
374
|
+
end
|
375
|
+
|
376
|
+
def reverse!
|
377
|
+
# Do regular reversal
|
378
|
+
@internal_array.reverse!
|
379
|
+
|
380
|
+
# Reverse all the internal parts
|
381
|
+
@internal_array.each do |e|
|
382
|
+
e.reverse!
|
383
|
+
end
|
384
|
+
|
385
|
+
return nil
|
386
|
+
end
|
387
|
+
|
388
|
+
def length
|
389
|
+
@internal_array.length
|
390
|
+
end
|
391
|
+
|
392
|
+
# Yield all oriented nodes anywhere in the regular or bubble
|
393
|
+
# bits.
|
394
|
+
def each_oriented_node
|
395
|
+
@internal_array.each do |e|
|
396
|
+
if e.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
|
397
|
+
e.oriented_nodes.each do |onode|
|
398
|
+
yield onode
|
399
|
+
end
|
400
|
+
else
|
401
|
+
yield e
|
402
|
+
end
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
def length_in_bp
|
407
|
+
sum = 0
|
408
|
+
each do |e|
|
409
|
+
if e.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
|
410
|
+
sum += e.reference_trail.length_in_bp_within_path
|
411
|
+
else
|
412
|
+
sum += e.node.length_alone
|
413
|
+
end
|
414
|
+
end
|
415
|
+
return sum
|
416
|
+
end
|
417
|
+
|
418
|
+
def reference_trail
|
419
|
+
trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
420
|
+
|
421
|
+
trail.trail = collect do |e|
|
422
|
+
if e.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
|
423
|
+
e.reference_trail.trail
|
424
|
+
else
|
425
|
+
e
|
426
|
+
end
|
427
|
+
end.flatten
|
428
|
+
|
429
|
+
return trail
|
430
|
+
end
|
431
|
+
|
432
|
+
def sequence
|
433
|
+
reference_trail.sequence
|
434
|
+
end
|
435
|
+
|
436
|
+
def coverage
|
437
|
+
coverages = []
|
438
|
+
lengths = []
|
439
|
+
each do |onode_or_bubble|
|
440
|
+
if onode_or_bubble.kind_of?(Bio::AssemblyGraphAlgorithms::BubblyAssembler::Bubble)
|
441
|
+
# Length isn't obvious, but let's go with reference path length just coz that's easy
|
442
|
+
this_length = onode_or_bubble.reference_trail.length_in_bp_within_path
|
443
|
+
lengths.push this_length
|
444
|
+
|
445
|
+
# Coverage of a bubble is the coverage of each node in the bubble
|
446
|
+
# each weighted by their length
|
447
|
+
coverages.push onode_or_bubble.coverage
|
448
|
+
else
|
449
|
+
#regular node. So simple average coverage
|
450
|
+
coverages.push onode_or_bubble.node.coverage
|
451
|
+
lengths.push onode_or_bubble.node.length_alone
|
452
|
+
end
|
453
|
+
end
|
454
|
+
|
455
|
+
# Then a simple weighted average
|
456
|
+
i = -1
|
457
|
+
total_length = lengths.reduce(:+)
|
458
|
+
|
459
|
+
answer = coverages.reduce(0.0) do |sum, cov|
|
460
|
+
i += 1
|
461
|
+
sum + (cov * lengths[i].to_f / total_length)
|
462
|
+
end
|
463
|
+
answer
|
464
|
+
end
|
465
|
+
end
|
466
|
+
|
467
|
+
|
468
|
+
# Tim - use 'waiting train' algorithm (made up by me).
|
469
|
+
# Problems collect the nodes they visit, adding them to hashes of 'ubiquitous' and 'visited' nodes
|
470
|
+
# (metaphor: 'train' (problem) visiting 'stations' (nodes)).
|
471
|
+
# Each time a problem is dequeued, new problems are enqueued for all neighbours to the problem node
|
472
|
+
# (metaphor: 'trains' (problems) magically duplicate for each path to a new 'station' (node) (methaphor
|
473
|
+
# breaks a bit here)).
|
474
|
+
# At each step the algorithm dequeues a problem, prioritising problems by shortest distance of any path
|
475
|
+
# to the problem node, meaning if a a problem is enqueued for a node that is already known, then that
|
476
|
+
# problem is prioritised (metaphor: when a train leaves a station (problem is deqeued) other 'trains' will
|
477
|
+
# wait in case it catches up, or otherwise reaches a more distant station).
|
478
|
+
# If a new problem is enqueued for a problem node that is currently in enqueued, the new problem is added
|
479
|
+
# to known problems removed from queue, and when a problem is dequeued, its ubiquitous and visited nodes
|
480
|
+
# are set to the ubiquitous and visited nodes of all known problems for the node (metaphor: the carriages
|
481
|
+
# of all trains at a station are merged into one train).
|
482
|
+
# Cycles occur when a problem reaches a node that is in its visited nodes hash (metaphor: a station that
|
483
|
+
# one of the train carriages has previously visited).
|
484
|
+
# Queued cyclic problems are added to known problems and then dropped.
|
485
|
+
# Bubble is converged when all current problems have a ubiquitous node in common (metaphor: all carriages
|
486
|
+
# of all current trains have visited a station).
|
487
|
+
class Bubble
|
488
|
+
include Bio::FinishM::Logging
|
489
|
+
|
490
|
+
# The DynamicProgrammingProblem this bubble converges on
|
491
|
+
attr_reader :converging_oriented_node_settable, :is_reverse, :root
|
492
|
+
|
493
|
+
# how many legit forks have been explored
|
494
|
+
attr_accessor :num_legit_forks
|
495
|
+
|
496
|
+
def initialize(bubble_root, options = {})
|
497
|
+
@queue = DS::AnyPriorityQueue.new {|a,b| a<=b}
|
498
|
+
@known_problems = {}
|
499
|
+
@current_problems = Set.new
|
500
|
+
@num_legit_forks = 0
|
501
|
+
@max_cycles = options[:max_cycles] || DEFAULT_MAX_CYCLES
|
502
|
+
@root = bubble_root
|
503
|
+
end
|
504
|
+
|
505
|
+
# Return the next closest dynamic programming problem,
|
506
|
+
# removing it from the bubble
|
507
|
+
def shift
|
508
|
+
prob = @queue.shift
|
509
|
+
unless prob.nil?
|
510
|
+
prob.ubiquitous_oriented_nodes = ubiquitous_oriented_nodes(prob)
|
511
|
+
prob.visited_oriented_nodes = visited_oriented_nodes(prob)
|
512
|
+
@current_problems.delete prob.to_settable
|
513
|
+
end
|
514
|
+
return prob
|
515
|
+
end
|
516
|
+
|
517
|
+
def visited_oriented_nodes(prob)
|
518
|
+
#all visited nodes for relevant problems
|
519
|
+
@known_problems[prob.to_settable].reduce(prob.ubiquitous_oriented_nodes) do |memo, problem|
|
520
|
+
memo + problem.ubiquitous_oriented_nodes
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
def ubiquitous_oriented_nodes(prob)
|
525
|
+
#only ubiquitous nodes from relevant problems
|
526
|
+
@known_problems[prob.to_settable].reduce(prob.ubiquitous_oriented_nodes) do |memo, problem|
|
527
|
+
memo & problem.ubiquitous_oriented_nodes
|
528
|
+
end
|
529
|
+
end
|
530
|
+
|
531
|
+
def shortest_problem_distance(prob)
|
532
|
+
# prioritise by the shortest distance for current problem
|
533
|
+
@known_problems[prob.to_settable].collect{|prob| prob.distance}.min
|
534
|
+
end
|
535
|
+
|
536
|
+
def enqueue(dynamic_programming_problem)
|
537
|
+
settable = dynamic_programming_problem.to_settable
|
538
|
+
|
539
|
+
|
540
|
+
@known_problems[settable] ||= []
|
541
|
+
@known_problems[settable].push dynamic_programming_problem
|
542
|
+
|
543
|
+
# don't requeue current problem or circular problem
|
544
|
+
unless dynamic_programming_problem.circular_path_detected == true or @current_problems.include? settable
|
545
|
+
@queue.enqueue dynamic_programming_problem, shortest_problem_distance(dynamic_programming_problem)
|
546
|
+
@current_problems << settable
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
|
551
|
+
# return true if the given problem converges the bubble, else false
|
552
|
+
def convergent_on?(dynamic_programming_problem)
|
553
|
+
settable = dynamic_programming_problem.to_settable
|
554
|
+
|
555
|
+
@queue.each do |problem| #convergent until not
|
556
|
+
return false unless ubiquitous_oriented_nodes(problem).include? settable
|
557
|
+
end
|
558
|
+
return true
|
559
|
+
end
|
560
|
+
|
561
|
+
# Finish off the bubble, assuming convergent_on? the given problem == true
|
562
|
+
def converge_on(dynamic_programming_problem)
|
563
|
+
@converging_oriented_node_settable = dynamic_programming_problem.to_settable
|
564
|
+
#free some memory
|
565
|
+
@queue = nil
|
566
|
+
@current_problems = nil
|
567
|
+
end
|
568
|
+
|
569
|
+
# yield or failing that return an Array of the list of oriented_nodes found
|
570
|
+
# in at least one path in this (presumed converged) bubble
|
571
|
+
def oriented_nodes
|
572
|
+
raise unless converged?
|
573
|
+
seen_nodes = {}
|
574
|
+
stack = DS::Stack.new
|
575
|
+
initial_solution = @known_problems[@converging_oriented_node_settable][0]
|
576
|
+
converging_onode = initial_solution.path[-1]
|
577
|
+
stack.push converging_onode
|
578
|
+
|
579
|
+
while onode = stack.pop
|
580
|
+
settable = onode.to_settable
|
581
|
+
next if seen_nodes.key?(settable)
|
582
|
+
|
583
|
+
if block_given?
|
584
|
+
if @is_reverse
|
585
|
+
yield onode.reverse
|
586
|
+
else
|
587
|
+
yield onode
|
588
|
+
end
|
589
|
+
end
|
590
|
+
|
591
|
+
seen_nodes[settable] = onode
|
592
|
+
|
593
|
+
# queue neighbours for paths that don't contain the converging onode
|
594
|
+
@known_problems[settable].each do |dpp|
|
595
|
+
stack.push dpp.path[-2] unless dpp.path.length < 2 or dpp.path[0...-1].include? converging_onode
|
596
|
+
end
|
597
|
+
end
|
598
|
+
|
599
|
+
return nil if block_given?
|
600
|
+
return seen_nodes.values
|
601
|
+
end
|
602
|
+
|
603
|
+
def num_known_problems
|
604
|
+
@known_problems.length
|
605
|
+
end
|
606
|
+
|
607
|
+
|
608
|
+
# Iterate over the paths returning each as an OrientedNodeTrail.
|
609
|
+
# Assumes the path is convergent.
|
610
|
+
def each_path(options = {})
|
611
|
+
raise unless converged?
|
612
|
+
max_cycles = options[:max_cycles] || @max_cycles
|
613
|
+
|
614
|
+
# Metric used to prioritise each_path
|
615
|
+
comparator = lambda do |problem1, problem2|
|
616
|
+
onode1 = nil
|
617
|
+
onode2 = nil
|
618
|
+
if problem1.path.length == 1 and problem2.path.length > 1
|
619
|
+
# Here the comparison cannot be made on 2nd last node coverages
|
620
|
+
# since one of the paths goes straight from the initial to the terminal
|
621
|
+
# node. Choose instead based on if the second last node has higher or lower
|
622
|
+
# coverage than the final node
|
623
|
+
onode1 = problem1.path[-1]
|
624
|
+
onode2 = problem2.path[-2]
|
625
|
+
elsif problem2.path.length == 1 and problem1.path.length > 1
|
626
|
+
onode1 = problem1.path[-2]
|
627
|
+
onode2 = problem2.path[-1]
|
628
|
+
else
|
629
|
+
onode1 = problem1.path[-2]
|
630
|
+
onode2 = problem2.path[-2]
|
631
|
+
end
|
632
|
+
#log.debug "Comparing nodes #{onode1.node.node_id} and #{onode2.node.node_id}" if log.debug?
|
633
|
+
|
634
|
+
if onode1.node.coverage == onode2.node.coverage
|
635
|
+
-(onode1.node.node_id <=> onode2.node.node_id)
|
636
|
+
else
|
637
|
+
onode1.node.coverage <=> onode2.node.coverage
|
638
|
+
end
|
639
|
+
end
|
640
|
+
|
641
|
+
log.debug "Iterating through each path of bubble" if log.debug?
|
642
|
+
|
643
|
+
# Tim - use stack and push paths with lowest coverage first
|
644
|
+
stack = DS::Stack.new
|
645
|
+
counter = Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder::CycleCounter.new max_cycles
|
646
|
+
initial_solution = @known_problems[@converging_oriented_node_settable][0]
|
647
|
+
stack.push [initial_solution.path, []]
|
648
|
+
converging_onode = converging_oriented_node
|
649
|
+
#log.debug "Pushed to stack #{initial_solution.path.to_shorthand}" if log.debug?
|
650
|
+
|
651
|
+
|
652
|
+
while path_parts = stack.pop
|
653
|
+
direct_node_trail = path_parts[0]
|
654
|
+
second_part = path_parts[1]
|
655
|
+
#log.debug "Popped #{direct_node_trail.to_shorthand} and [#{second_part.collect{|o| o.to_shorthand}.join(',') }]" if log.debug?
|
656
|
+
|
657
|
+
|
658
|
+
if direct_node_trail.trail.length == 0
|
659
|
+
|
660
|
+
# check for cycles through bubble root
|
661
|
+
if second_part.include? @root
|
662
|
+
#log.debug "Found cycle through bubble root." if log.debug?
|
663
|
+
@circuitous = true unless @circuitous
|
664
|
+
if max_cycles == 0 or max_cycles < counter.path_cycle_count([@root]+second_part)
|
665
|
+
#log.debug "Not finishing cyclic path with too many cycles." if log.debug?
|
666
|
+
next
|
667
|
+
end
|
668
|
+
end
|
669
|
+
|
670
|
+
yield_path = Bio::Velvet::Graph::OrientedNodeTrail.new
|
671
|
+
yield_path.trail = second_part
|
672
|
+
if @is_reverse
|
673
|
+
yield_path = yield_path.reverse
|
674
|
+
end
|
675
|
+
log.debug "Yielded #{yield_path.to_shorthand}" if log.debug?
|
676
|
+
yield yield_path
|
677
|
+
else
|
678
|
+
# go down the path, looking for other paths
|
679
|
+
head_onode = direct_node_trail.trail[-1]
|
680
|
+
new_second_part = [head_onode]+second_part
|
681
|
+
if second_part.length > 1 and head_onode == converging_oriented_node
|
682
|
+
#log.debug "Ignoring path with cycle through converged node." if log.debug?
|
683
|
+
next
|
684
|
+
end
|
685
|
+
if second_part.include? head_onode
|
686
|
+
#log.debug "Cycle at node #{head_onode.node_id} in path #{second_part.collect{|onode| onode.node.node_id}.join(',')}." if log.debug?
|
687
|
+
@circuitous = true unless @circuitous
|
688
|
+
if max_cycles == 0 or max_cycles < counter.path_cycle_count(new_second_part)
|
689
|
+
#log.debug "Not finishing cyclic path with too many cycles." if log.debug?
|
690
|
+
next
|
691
|
+
end
|
692
|
+
end
|
693
|
+
|
694
|
+
new_problems = @known_problems[head_onode.to_settable]
|
695
|
+
#log.debug "Found new problems: #{new_problems.collect{|prob| prob.to_shorthand}.join(' ') }" if log.debug?
|
696
|
+
|
697
|
+
problem_leads = Set.new
|
698
|
+
filtered_problems = new_problems.reject do |new_problem|
|
699
|
+
# Only enqueue paths where the second-to-head onode is not already queued
|
700
|
+
unless new_problem.path.length < 2
|
701
|
+
lead_settable = new_problem.path[-2].to_settable
|
702
|
+
if problem_leads.include? lead_settable
|
703
|
+
#log.debug "Ignoring duplicate neighbour problem #{new_problem.to_shorthand}" if log.debug?
|
704
|
+
next true
|
705
|
+
end
|
706
|
+
problem_leads << lead_settable
|
707
|
+
end
|
708
|
+
false
|
709
|
+
end
|
710
|
+
|
711
|
+
filtered_problems.sort(&comparator).each do |new_problem|
|
712
|
+
# TODO: deal with circuits
|
713
|
+
new_trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
714
|
+
new_trail.trail = new_problem.path[0...-1]
|
715
|
+
#log.debug "Enqueuing #{new_trail.to_shorthand} and [#{new_second_part.collect{|o| o.to_shorthand}.join(',') }]" if log.debug?
|
716
|
+
stack.push [new_trail, new_second_part]
|
717
|
+
end
|
718
|
+
end
|
719
|
+
end
|
720
|
+
end
|
721
|
+
|
722
|
+
def paths
|
723
|
+
to_return = []
|
724
|
+
each_path do |path|
|
725
|
+
to_return.push path
|
726
|
+
end
|
727
|
+
to_return
|
728
|
+
end
|
729
|
+
|
730
|
+
def converged?
|
731
|
+
!@converging_oriented_node_settable.nil?
|
732
|
+
end
|
733
|
+
|
734
|
+
# Return the OrientedNode that converges this bubble, behaviour
|
735
|
+
# undefined if bubble is not converged
|
736
|
+
def converging_oriented_node
|
737
|
+
@known_problems[@converging_oriented_node_settable][0].path[-1]
|
738
|
+
end
|
739
|
+
|
740
|
+
def to_shorthand
|
741
|
+
shorts = []
|
742
|
+
if converged?
|
743
|
+
shorts = paths.sort{|a,b| a.to_shorthand <=> b.to_shorthand }.collect{|path| path.to_shorthand}
|
744
|
+
else
|
745
|
+
@queue.each do |problem|
|
746
|
+
shorts.push problem.to_shorthand
|
747
|
+
end
|
748
|
+
end
|
749
|
+
return "{#{shorts.join('|') }}"
|
750
|
+
end
|
751
|
+
|
752
|
+
def reverse!
|
753
|
+
@is_reverse ||= false
|
754
|
+
@is_reverse = !@is_reverse
|
755
|
+
end
|
756
|
+
|
757
|
+
# This doesn't make sense unless this is a converged bubble and the index == -1
|
758
|
+
# because otherwise there is multiple answers
|
759
|
+
def [](index)
|
760
|
+
raise unless index == -1
|
761
|
+
return Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new(
|
762
|
+
@converging_oriented_node_settable[0],
|
763
|
+
@converging_oriented_node_settable[1]
|
764
|
+
)
|
765
|
+
end
|
766
|
+
|
767
|
+
# Return one trail that exemplifies the paths through this bubble.
|
768
|
+
# Current method of path selection is simply greedy, taking the highest coverage node
|
769
|
+
# at each fork (or failing that the node with the lower node_id).
|
770
|
+
def reference_trail(max_cycles = @max_cycles)
|
771
|
+
raise unless converged?
|
772
|
+
|
773
|
+
converging_onode = converging_oriented_node
|
774
|
+
log.debug "Finding reference trail from node #{converging_onode.node.node_id}" if log.debug?
|
775
|
+
|
776
|
+
reference_trail = nil
|
777
|
+
each_path do |path|
|
778
|
+
#break when first path is found
|
779
|
+
reference_trail = path
|
780
|
+
break
|
781
|
+
end
|
782
|
+
|
783
|
+
return reference_trail
|
784
|
+
end
|
785
|
+
|
786
|
+
# Does this (coverged) bubble contain any circuits?
|
787
|
+
def circuitous?
|
788
|
+
raise unless converged?
|
789
|
+
if @circuitous.nil?
|
790
|
+
each_path({:max_cycles => 0}) {|| break if @circuitous}
|
791
|
+
@circuitous ||= false
|
792
|
+
end
|
793
|
+
@circuitous
|
794
|
+
end
|
795
|
+
|
796
|
+
# Coverage of a bubble is the coverage of each node in the bubble
|
797
|
+
# each weighted by their length
|
798
|
+
def coverage
|
799
|
+
sum = 0.0
|
800
|
+
length = 0
|
801
|
+
oriented_nodes do |onode|
|
802
|
+
node_length = onode.node.length_alone
|
803
|
+
sum += onode.node.coverage * node_length
|
804
|
+
length += node_length
|
805
|
+
end
|
806
|
+
return sum / length
|
807
|
+
end
|
808
|
+
end
|
809
|
+
|
810
|
+
class DynamicProgrammingProblem
|
811
|
+
attr_accessor :path, :ubiquitous_oriented_nodes, :visited_oriented_nodes, :distance, :circular_path_detected
|
812
|
+
|
813
|
+
def initialize
|
814
|
+
@path = []
|
815
|
+
@ubiquitous_oriented_nodes = Set.new
|
816
|
+
end
|
817
|
+
|
818
|
+
def to_settable
|
819
|
+
@path[-1].to_settable
|
820
|
+
end
|
821
|
+
|
822
|
+
def to_s
|
823
|
+
ubiquitous_nodes = @ubiquitous_oriented_nodes.collect do |settabled|
|
824
|
+
"#{settabled[0] }#{settabled[1] == Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST ? 's' : 'e'}"
|
825
|
+
end
|
826
|
+
return "DPP #{self.object_id}: #{@path.to_shorthand}/#{ubiquitous_nodes.join(',') }/#{distance}"
|
827
|
+
end
|
828
|
+
|
829
|
+
def to_shorthand
|
830
|
+
ubiquitous_nodes = @ubiquitous_oriented_nodes.collect do |settabled|
|
831
|
+
"#{settabled[0] }#{settabled[1] == Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST ? 's' : 'e'}"
|
832
|
+
end
|
833
|
+
"#{@path.to_shorthand}/#{ubiquitous_nodes.join(',') }/#{distance}"
|
834
|
+
end
|
835
|
+
end
|
836
|
+
|
837
|
+
class ComparableArray < Array
|
838
|
+
include Comparable
|
839
|
+
end
|
840
|
+
|
841
|
+
class CircuitousPathDetected < Exception; end
|
842
|
+
end
|