finishm 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'ds'
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
module Bio
|
5
|
+
module AssemblyGraphAlgorithms
|
6
|
+
class GraphExplorer
|
7
|
+
# Return all paths that emenate from a given node, in the graph
|
8
|
+
def explore_from_node(graph, initial_path, leash_length)
|
9
|
+
# Do a simple depth first search, forking at each node. Vanilla graph traversal.
|
10
|
+
depth_first_search_stack = DS::Stack.new
|
11
|
+
first_path = ExplorationPath.new initial_path
|
12
|
+
depth_first_search_stack.push first_path
|
13
|
+
found_paths = []
|
14
|
+
# While there's more paths to explore
|
15
|
+
while current_path = depth_first_search_stack.pop
|
16
|
+
last = current_path.path.last
|
17
|
+
if !leash_length.nil? and current_path.path.length_in_bp > leash_length
|
18
|
+
current_path.termination_type = 'Leashed'
|
19
|
+
found_paths.push current_path
|
20
|
+
else
|
21
|
+
neighbours = current_path.path.neighbours_of_last_node(graph)
|
22
|
+
if neighbours.empty?
|
23
|
+
current_path.termination_type = 'Dead end / coverage'
|
24
|
+
found_paths.push current_path
|
25
|
+
else
|
26
|
+
neighbours_to_add = []
|
27
|
+
neighbours.each do |oriented_neighbour|
|
28
|
+
# Test for loops, I'm only interested in acyclic paths for the moment
|
29
|
+
if current_path.include?(oriented_neighbour)
|
30
|
+
#loop found, terminate path
|
31
|
+
new_path = current_path.copy
|
32
|
+
new_path.add_node oriented_neighbour
|
33
|
+
new_path.termination_type = 'Loop'
|
34
|
+
found_paths.push new_path
|
35
|
+
else
|
36
|
+
neighbours_to_add.push oriented_neighbour
|
37
|
+
end
|
38
|
+
end
|
39
|
+
neighbours_to_add.each_with_index do |oriented_neighbour, i|
|
40
|
+
# If the last neighbour is being added here, reuse the path
|
41
|
+
next_path = nil
|
42
|
+
if i == neighbours_to_add.length-1
|
43
|
+
next_path = current_path
|
44
|
+
else
|
45
|
+
next_path = current_path.copy
|
46
|
+
end
|
47
|
+
next_path.add_node oriented_neighbour
|
48
|
+
depth_first_search_stack.push next_path
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
return found_paths
|
55
|
+
end
|
56
|
+
|
57
|
+
class ExplorationPath
|
58
|
+
attr_accessor :path, :set_of_nodes, :termination_type
|
59
|
+
|
60
|
+
def initialize(path)
|
61
|
+
@path = path
|
62
|
+
@set_of_nodes = Set.new path.collect{|n| n.to_settable}
|
63
|
+
end
|
64
|
+
|
65
|
+
def include?(oriented_node)
|
66
|
+
@set_of_nodes.include?(oriented_node.to_settable)
|
67
|
+
end
|
68
|
+
|
69
|
+
def add_node(onode)
|
70
|
+
path.add_oriented_node onode
|
71
|
+
@set_of_nodes << onode.to_settable
|
72
|
+
end
|
73
|
+
|
74
|
+
def copy
|
75
|
+
anew = ExplorationPath.new @path.copy
|
76
|
+
return anew
|
77
|
+
end
|
78
|
+
|
79
|
+
def to_s
|
80
|
+
@path.collect{|on| on.node_id}.join(',')
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,315 @@
|
|
1
|
+
require 'bio-velvet'
|
2
|
+
require 'bio'
|
3
|
+
require 'pry'
|
4
|
+
|
5
|
+
class Bio::FinishM::GraphGenerator
|
6
|
+
include Bio::FinishM::Logging
|
7
|
+
|
8
|
+
DEFAULT_OPTIONS = {
|
9
|
+
:velvet_kmer_size => 51,
|
10
|
+
:assembly_coverage_cutoff => 3.5,
|
11
|
+
}
|
12
|
+
|
13
|
+
def add_options(option_parser, options)
|
14
|
+
options.merge!(DEFAULT_OPTIONS)
|
15
|
+
option_parser.on("--assembly-kmer NUMBER", "when assembling, use this kmer length [default: #{options[:velvet_kmer_size] }]") do |arg|
|
16
|
+
options[:velvet_kmer_size] = arg.to_i
|
17
|
+
end
|
18
|
+
option_parser.on("--assembly-coverage-cutoff NUMBER", "Require this much coverage in each node, all other nodes are removed [default: #{options[:assembly_coverage_cutoff] }]") do |arg|
|
19
|
+
options[:assembly_coverage_cutoff] = arg.to_f
|
20
|
+
end
|
21
|
+
option_parser.on("--post-assembly-coverage-cutoff NUMBER", "Require this much coverage in each node, implemented after assembly [default: not used]") do |arg|
|
22
|
+
options[:post_assembly_coverage_cutoff] = arg.to_f
|
23
|
+
end
|
24
|
+
option_parser.on("--velvet-directory PATH", "Output assembly intermediate files to this directory [default: use temporary directory, delete afterwards]") do |arg|
|
25
|
+
options[:output_assembly_path] = arg
|
26
|
+
end
|
27
|
+
option_parser.on("--already-assembled-velvet-directory PATH", "If an assembly directory has been specified previously with --velvet-directory, re-use this assembly rather than re-doing the assembly [default: off]") do |arg|
|
28
|
+
options[:previous_assembly] = arg
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Generate a ProbedGraph object, given one or more 'probe sequences'
|
33
|
+
# and metagenomic reads. This is a rather large method, but seems to
|
34
|
+
# be approximately repeated in different applications of FinishM, so
|
35
|
+
# creating it for DRY purposes.
|
36
|
+
#
|
37
|
+
# probe_sequences: DNA sequences (as String objects whose direction points to the outsides of contigs)
|
38
|
+
# read_inputs: a ReadInput object, containing the information to feed to velveth
|
39
|
+
#
|
40
|
+
# options:
|
41
|
+
# :probe_reads: a list of sequence numbers (numbering as per velvet Sequence file)
|
42
|
+
# :probe_read_names: a list of sequence names (not IDs) that are probes (convert the names to IDs using the CnyUnifiedSeqNames file). There may not be a one to one correspondence of these read names and the probe reads returned in the ProbedGraph since reads can map to multiple sequence IDs.
|
43
|
+
# :velvet_kmer_size: kmer
|
44
|
+
# :assembly_coverage_cutoff: coverage cutoff for nodes
|
45
|
+
# :post_assembly_coverage_cutoff: apply this coverage cutoff to nodes after parsing assembly
|
46
|
+
# :output_assembly_path: write assembly to this directory
|
47
|
+
# :previous_assembly: a velvet directory from a previous run of the same probe sequences and reads. (Don't re-assemble)
|
48
|
+
# :use_textual_sequence_file: by default, a binary sequence file is used. Set this true to get velvet to generate the Sequences file
|
49
|
+
# :remove_unconnected_nodes: delete nodes from the graph that are not connected to the probe nodes
|
50
|
+
# :graph_search_leash_length: when :remove_unconnected_nodes'ing, use this leash length
|
51
|
+
# :dont_parse_noded_reads: if true, skip parsing noded reads (ie the positions of the reads in the graph)
|
52
|
+
# :dont_parse_reads: if true, skip parsing reads (ie the sequences of the reads themselves)
|
53
|
+
def generate_graph(probe_sequences, read_inputs, options={})
|
54
|
+
options[:parse_sequence_file] ||= true
|
55
|
+
graph = nil
|
56
|
+
read_probing_graph = nil
|
57
|
+
finishm_graph = Bio::FinishM::ProbedGraph.new
|
58
|
+
|
59
|
+
log.debug "Options for generate_graph: #{options}" if log.debug?
|
60
|
+
|
61
|
+
velvet_binary_folder = File.join(File.dirname(__FILE__),'..','..','ext','src')
|
62
|
+
log.debug "Using velvet binary folder #{velvet_binary_folder}" if log.debug?
|
63
|
+
|
64
|
+
velvet_result = nil
|
65
|
+
|
66
|
+
probe_read_ids = nil
|
67
|
+
if options[:probe_reads]
|
68
|
+
probe_read_ids = options[:probe_reads]
|
69
|
+
else
|
70
|
+
probe_read_ids = Set.new((1..probe_sequences.length))
|
71
|
+
end
|
72
|
+
if options[:previous_assembly].nil? #If assembly has not already been carried out
|
73
|
+
Tempfile.open('probes.fa') do |tempfile|
|
74
|
+
50.times do # Do 50 times to make sure that velvet doesn't throw out parts of the graph that contain this contig
|
75
|
+
probe_sequences.each_with_index do |probe, i|
|
76
|
+
tempfile.puts ">probe#{i}"
|
77
|
+
tempfile.puts probe
|
78
|
+
end
|
79
|
+
end
|
80
|
+
tempfile.close
|
81
|
+
singles = read_inputs.fasta_singles
|
82
|
+
if singles and !singles.empty?
|
83
|
+
read_inputs.fasta_singles = [tempfile.path, singles].flatten
|
84
|
+
else
|
85
|
+
read_inputs.fasta_singles = [tempfile.path]
|
86
|
+
end
|
87
|
+
log.debug "Inputting probes into the assembly:\n#{File.open(tempfile.path).read}" if log.debug?
|
88
|
+
|
89
|
+
runner = Bio::Velvet::Runner.new
|
90
|
+
required_version = '1.2.10-wwood_finishm'
|
91
|
+
found_version = runner.binary_version(File.join(velvet_binary_folder, 'velveth'))
|
92
|
+
if found_version != required_version
|
93
|
+
raise "Detected velvet version incompatible with FinishM: #{found_version}, expected #{required_version} which is available from https://github.com/wwood/velvet (on branch less_clipping)"
|
94
|
+
end
|
95
|
+
|
96
|
+
log.info "Assembling sampled reads with velvet"
|
97
|
+
raise "Need to specify -cov_cutoff" if options[:assembly_coverage_cutoff].nil?
|
98
|
+
raise "Need to specify a kmer size" if options[:velvet_kmer_size].nil?
|
99
|
+
# Bit of a hack, but have to use -short1 as the anchors because then start and end anchors will have node IDs 1,2,... etc.
|
100
|
+
use_binary = options[:use_textual_sequence_file] ? '' : '-create_binary'
|
101
|
+
velvet_result = runner.velvet(
|
102
|
+
options[:velvet_kmer_size],
|
103
|
+
"#{read_inputs.velvet_read_arguments} #{use_binary}",
|
104
|
+
"-read_trkg yes -cov_cutoff #{options[:assembly_coverage_cutoff] } -tour_bus no -read_to_node_binary yes",
|
105
|
+
:output_assembly_path => options[:output_assembly_path],
|
106
|
+
:velveth_path => File.join(velvet_binary_folder, 'velveth'),
|
107
|
+
:velvetg_path => File.join(velvet_binary_folder, 'velvetg'),
|
108
|
+
)
|
109
|
+
if log.debug?
|
110
|
+
log.debug "velveth stdout: #{velvet_result.velveth_stdout}"
|
111
|
+
log.debug "velveth stderr: #{velvet_result.velveth_stderr}"
|
112
|
+
log.debug "velvetg stdout: #{velvet_result.velvetg_stdout}"
|
113
|
+
log.debug "velvetg stderr: #{velvet_result.velvetg_stderr}"
|
114
|
+
end
|
115
|
+
log.info "Finished running assembly"
|
116
|
+
finishm_graph.velvet_result_directory = velvet_result.result_directory
|
117
|
+
end
|
118
|
+
else
|
119
|
+
log.info "Using previous assembly stored in #{options[:previous_assembly] }"
|
120
|
+
velvet_result = Bio::Velvet::Result.new
|
121
|
+
velvet_result.result_directory = options[:previous_assembly]
|
122
|
+
finishm_graph.velvet_result_directory = velvet_result.result_directory
|
123
|
+
end
|
124
|
+
|
125
|
+
# Check that the probe reads given are present in the assembly passed here
|
126
|
+
unless options[:dont_parse_reads]
|
127
|
+
sequence_store = parse_velvet_binary_reads(velvet_result.result_directory)
|
128
|
+
finishm_graph.velvet_sequences = sequence_store
|
129
|
+
if !check_probe_sequences(probe_sequences, sequence_store)
|
130
|
+
raise "Probe sequences changed since previous velvet assembly!"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
log.info "Parsing the graph output from velvet"
|
135
|
+
opts = {
|
136
|
+
# noded reads are parsed in via C, if they are wanted at all
|
137
|
+
:dont_parse_noded_reads => true
|
138
|
+
}
|
139
|
+
bio_velvet_graph = Bio::Velvet::Graph.parse_from_file(
|
140
|
+
File.join(velvet_result.result_directory, 'LastGraph'),
|
141
|
+
opts
|
142
|
+
)
|
143
|
+
log.info "Finished parsing graph: found #{bio_velvet_graph.nodes.length} nodes and #{bio_velvet_graph.arcs.length} arcs"
|
144
|
+
|
145
|
+
if options[:dont_parse_noded_reads]
|
146
|
+
graph = bio_velvet_graph
|
147
|
+
else
|
148
|
+
log.info "Beginning parse of graph using velvet's parsing C code.."
|
149
|
+
read_probing_graph = Bio::Velvet::Underground::Graph.parse_from_file File.join(velvet_result.result_directory, 'LastGraph')
|
150
|
+
log.info "Completed velvet code parsing velvet graph"
|
151
|
+
|
152
|
+
# Make the two graphs into a hybrid one
|
153
|
+
graph = Bio::FinishM::HybridGraph.new(bio_velvet_graph, read_probing_graph)
|
154
|
+
end
|
155
|
+
finishm_graph.graph = graph
|
156
|
+
|
157
|
+
# Find the anchor nodes again
|
158
|
+
anchor_sequence_ids = probe_read_ids.to_a.sort
|
159
|
+
endings = []
|
160
|
+
unless probe_read_ids.empty? and options[:probe_read_names].nil? #don't bother trying to find probes if none exists
|
161
|
+
# Convert read names to read IDs if required
|
162
|
+
if options[:probe_read_names]
|
163
|
+
# Probe reads are given as names, not IDs. What are the corresponding probes then?
|
164
|
+
entries = Bio::Velvet::CnyUnifiedSeqNamesFile.extract_entries_using_grep_hack(
|
165
|
+
File.join(velvet_result.result_directory, 'CnyUnifiedSeq.names'),
|
166
|
+
options[:probe_read_names]
|
167
|
+
)
|
168
|
+
anchor_sequence_ids = []
|
169
|
+
double_counts = 0
|
170
|
+
options[:probe_read_names].each do |name| #maintain order of them as they are specified in the original array parameter
|
171
|
+
if entries[name].empty?
|
172
|
+
raise "Unable to find probe `#{name}' in the probe reads file - was it included in the assembly?"
|
173
|
+
elsif entries[name].length > 2
|
174
|
+
raise "Found >2 sequences named #{name} in the assembly, being conservative and not continuing"
|
175
|
+
else
|
176
|
+
entries[name].each do |res|
|
177
|
+
anchor_sequence_ids.push res.read_id
|
178
|
+
end
|
179
|
+
if entries[name].length == 2
|
180
|
+
double_counts += 1
|
181
|
+
log.debug "Found 2 sequences for #{name}" if log.debug?
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
if double_counts > 0
|
186
|
+
log.info "#{double_counts} reads were found twice (likely as pairs), including both as probes"
|
187
|
+
end
|
188
|
+
log.info "Recovered #{anchor_sequence_ids.length} sequences using their names" if log.info?
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
# Parse the read to node structure
|
193
|
+
log.info "Reading ReadToNode.bin file.." if log.info?
|
194
|
+
finishm_graph.read_to_nodes = Bio::FinishM::ReadToNode.new(File.join(velvet_result.result_directory, 'ReadToNode.bin'))
|
195
|
+
|
196
|
+
finder = Bio::AssemblyGraphAlgorithms::NodeFinder.new
|
197
|
+
log.info "Finding probe nodes in the assembly"
|
198
|
+
c_graph_endings = finder.find_probes_from_read_to_node(finishm_graph.graph, finishm_graph.read_to_nodes, anchor_sequence_ids)
|
199
|
+
log.debug "Converting probe nodes found in C graph to Ruby analogues and adding to Ruby-parsed graph"
|
200
|
+
endings = c_graph_endings.collect do |node_direction_read|
|
201
|
+
if node_direction_read.empty?
|
202
|
+
# No probe found
|
203
|
+
[]
|
204
|
+
else #found a node.
|
205
|
+
#equivalent node
|
206
|
+
node = graph.nodes[node_direction_read[0].node_id]
|
207
|
+
#equivalent direction
|
208
|
+
direction = node_direction_read[1]
|
209
|
+
#equivalent noded read
|
210
|
+
nr = Bio::Velvet::Graph::NodedRead.new
|
211
|
+
# nr.read_id = read_id
|
212
|
+
# nr.offset_from_start_of_node = row[1].to_i
|
213
|
+
# nr.start_coord = row[2].to_i
|
214
|
+
# nr.direction = current_node_direction
|
215
|
+
cnr = node_direction_read[2]
|
216
|
+
nr.read_id = cnr.read_id
|
217
|
+
nr.offset_from_start_of_node = cnr.offset_from_start_of_node
|
218
|
+
nr.start_coord = cnr.start_coord
|
219
|
+
nr.direction = direction
|
220
|
+
# collect
|
221
|
+
[node, direction, nr]
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
finishm_graph.probe_nodes = endings.collect{|array| array[0]}
|
226
|
+
finishm_graph.probe_node_directions = endings.collect{|array| array[1]}
|
227
|
+
finishm_graph.probe_node_reads = endings.collect{|array| array[2]}
|
228
|
+
|
229
|
+
# Check to make sure the probe sequences map to nodes in the graph
|
230
|
+
if finishm_graph.completely_probed?
|
231
|
+
if log.info?
|
232
|
+
found_all = true
|
233
|
+
num_found = 0
|
234
|
+
finishm_graph.probe_nodes.each_with_index do |probe,i|
|
235
|
+
if probe.nil?
|
236
|
+
found_all = false
|
237
|
+
log.debug "Unable to recover probe ##{i+1}, perhaps this will cause problems, but proceding optimistically"
|
238
|
+
else
|
239
|
+
num_found += 1
|
240
|
+
end
|
241
|
+
end
|
242
|
+
if found_all
|
243
|
+
if finishm_graph.probe_nodes.empty?
|
244
|
+
log.debug "No probes specified, so didn't find any"
|
245
|
+
else
|
246
|
+
log.info "Found all anchoring nodes in the graph."
|
247
|
+
end
|
248
|
+
else
|
249
|
+
log.info "Found #{num_found} of #{finishm_graph.probe_nodes.length} anchoring nodes in the graph, ignoring the rest"
|
250
|
+
end
|
251
|
+
end
|
252
|
+
else
|
253
|
+
raise "Unable to find all anchor reads from the assembly, cannot continue. This is probably an error with this script, not you. Probes not found: #{finishm_graph.missing_probe_indices.inspect}"
|
254
|
+
end
|
255
|
+
|
256
|
+
if options[:post_assembly_coverage_cutoff]
|
257
|
+
log.info "Removing nodes with coverage < #{options[:post_assembly_coverage_cutoff] } from graph.."
|
258
|
+
original_num_nodes = graph.nodes.length
|
259
|
+
original_num_arcs = graph.arcs.length
|
260
|
+
filter = Bio::AssemblyGraphAlgorithms::CoverageBasedGraphFilter.new
|
261
|
+
filter.remove_low_coverage_nodes(graph,
|
262
|
+
options[:post_assembly_coverage_cutoff],
|
263
|
+
:whitelisted_sequences => Set.new(anchor_sequence_ids)
|
264
|
+
)
|
265
|
+
log.info "Removed #{original_num_nodes-graph.nodes.length} nodes and #{original_num_arcs-graph.arcs.length} arcs, leaving #{graph.nodes.length} nodes and #{graph.arcs.length} arcs."
|
266
|
+
end
|
267
|
+
|
268
|
+
if options[:remove_unconnected_nodes]
|
269
|
+
if options[:graph_search_leash_length]
|
270
|
+
log.info "Removing nodes unconnected to probe nodes from the graph using leash #{options[:graph_search_leash_length] }.."
|
271
|
+
else
|
272
|
+
log.info "Removing nodes unconnected to probe nodes from the graph without using a leash.."
|
273
|
+
end
|
274
|
+
original_num_nodes = graph.nodes.length
|
275
|
+
original_num_arcs = graph.arcs.length
|
276
|
+
filter = Bio::AssemblyGraphAlgorithms::ConnectivityBasedGraphFilter.new
|
277
|
+
filter.remove_unconnected_nodes(
|
278
|
+
graph,
|
279
|
+
finishm_graph.probe_nodes.reject{|n| n.nil?},
|
280
|
+
:leash_length => options[:graph_search_leash_length]
|
281
|
+
)
|
282
|
+
log.info "Removed #{original_num_nodes-graph.nodes.length} nodes and #{original_num_arcs-graph.arcs.length} arcs, leaving #{graph.nodes.length} nodes and #{graph.arcs.length} arcs."
|
283
|
+
end
|
284
|
+
|
285
|
+
return finishm_graph
|
286
|
+
end
|
287
|
+
|
288
|
+
# Read in the reads from a velvet result
|
289
|
+
def parse_velvet_binary_reads(velvet_result_directory)
|
290
|
+
sequences_file_path = File.join velvet_result_directory, 'CnyUnifiedSeq'
|
291
|
+
log.info "Reading in the actual sequences of all reads from #{sequences_file_path}"
|
292
|
+
sequences = Bio::Velvet::Underground::BinarySequenceStore.new sequences_file_path
|
293
|
+
log.info "Read in #{sequences.length} sequences"
|
294
|
+
return sequences
|
295
|
+
end
|
296
|
+
|
297
|
+
# When re-using an assembly, sometimes need to make
|
298
|
+
# sure that the probe sequences used previously are the same
|
299
|
+
# as what is given this time. Given am Array of probe sequences
|
300
|
+
# and a binary_sequence_file, check the probe sequences are the
|
301
|
+
# consistent.
|
302
|
+
def check_probe_sequences(probe_sequences, sequence_store)
|
303
|
+
return true if probe_sequences.nil?
|
304
|
+
|
305
|
+
probe_sequences.each_with_index do |probe, i|
|
306
|
+
log.debug "Checking probe sequence \##{i+1}" if log.debug?
|
307
|
+
if sequence_store[i+1].upcase != probe.upcase
|
308
|
+
log.error "Probe sequence \##{i+1} has changed - perhaps the wrong velvet assembly directory was specified, or a fresh assembly is required?"
|
309
|
+
return false
|
310
|
+
end
|
311
|
+
end
|
312
|
+
log.debug "Presence of #{probe_sequences.length} probe sequences verified"
|
313
|
+
return true
|
314
|
+
end
|
315
|
+
end
|
@@ -0,0 +1,355 @@
|
|
1
|
+
require 'ds'
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
class Bio::AssemblyGraphAlgorithms::HeightFinder
|
5
|
+
include Bio::FinishM::Logging
|
6
|
+
|
7
|
+
# visit nodes in range and determine heights
|
8
|
+
def traverse(graph, initial_nodes, options={})
|
9
|
+
by_height = []
|
10
|
+
traversal_nodes = {}
|
11
|
+
cycles = {}
|
12
|
+
nodes_in_retrace_phase = Set.new
|
13
|
+
|
14
|
+
# depth-first so stack
|
15
|
+
stack = DS::Stack.new
|
16
|
+
initial_nodes.each do |onode|
|
17
|
+
next if options[:range] and options[:range].none?{|other| other == onode.node }
|
18
|
+
traversal_node = CyclicTraversalNode.new
|
19
|
+
traversal_node.onode = options[:reverse] ? onode.reverse : onode
|
20
|
+
traversal_node.nodes_in = []
|
21
|
+
traversal_nodes[traversal_node.onode.to_settable] = traversal_node
|
22
|
+
stack.push traversal_node
|
23
|
+
end
|
24
|
+
|
25
|
+
while traversal_node = stack.pop
|
26
|
+
settable = traversal_node.onode.to_settable
|
27
|
+
describe = nil
|
28
|
+
|
29
|
+
if log.debug?
|
30
|
+
log.debug "visiting #{traversal_node.describe}."
|
31
|
+
end
|
32
|
+
|
33
|
+
# Consider node solved if height is known.
|
34
|
+
if not traversal_node.height.nil?
|
35
|
+
log.debug "Height of #{traversal_node.describe} is known. Skip." if log.debug?
|
36
|
+
next
|
37
|
+
end
|
38
|
+
|
39
|
+
# find neighbours
|
40
|
+
neighbours = traversal_node.nodes_out
|
41
|
+
if neighbours.nil?
|
42
|
+
neighbours = traversal_node.onode.next_neighbours(graph)
|
43
|
+
if options[:range]
|
44
|
+
neighbours.reject!{|onode| options[:range].none?{|other| other == onode.node}} #not in defined range
|
45
|
+
end
|
46
|
+
|
47
|
+
# Get or create traversal version of node
|
48
|
+
neighbours = neighbours.collect do |onode|
|
49
|
+
nbr_settable = onode.to_settable
|
50
|
+
traversal_nbr = traversal_nodes[nbr_settable]
|
51
|
+
if traversal_nbr.nil?
|
52
|
+
traversal_nbr = CyclicTraversalNode.new
|
53
|
+
traversal_nbr.onode = onode
|
54
|
+
traversal_nbr.nodes_in = []
|
55
|
+
traversal_nodes[nbr_settable] = traversal_nbr
|
56
|
+
end
|
57
|
+
traversal_nbr
|
58
|
+
end
|
59
|
+
|
60
|
+
#remember neighbours
|
61
|
+
traversal_node.nodes_out = neighbours
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
# Can we solve the node?
|
66
|
+
if neighbours.empty? #check for a tip
|
67
|
+
log.debug "#{traversal_node.describe} is a tip." if log.debug?
|
68
|
+
traversal_node.height = 0
|
69
|
+
if by_height[0].nil?
|
70
|
+
by_height[0] = [traversal_node]
|
71
|
+
else
|
72
|
+
by_height[0].push(traversal_node)
|
73
|
+
end
|
74
|
+
log.debug "Found height '0' for #{traversal_node.describe}." if log.debug?
|
75
|
+
next
|
76
|
+
end
|
77
|
+
|
78
|
+
if nodes_in_retrace_phase.include? settable
|
79
|
+
log.debug "Retracing back to #{traversal_node.describe}." if log.debug?
|
80
|
+
|
81
|
+
# Neighbours should have been explored
|
82
|
+
# Are neighbours involved in cycles?
|
83
|
+
cyclic_neighbours = neighbours.reject{|node| node.cycles.nil?}
|
84
|
+
if not cyclic_neighbours.empty?
|
85
|
+
# current node is in a cycle if a neighbour is in an unclosed cycle
|
86
|
+
log.debug "Found cyclic neighbours #{cyclic_neighbours.collect{|node| node.describe}.join(',')}." if log.debug?
|
87
|
+
cyclic_neighbours.each do |node|
|
88
|
+
node.cycles.each do |cycle|
|
89
|
+
log.debug "Merging cycle #{cycle.onodes.collect{|onode| onode.to_shorthand}.join(',')}." if log.debug?
|
90
|
+
new_cycle = traversal_node.merge_unclosed_cycle cycle.copy
|
91
|
+
if not new_cycle.nil? and new_cycle.closed?
|
92
|
+
log.debug "Cycle completes at #{traversal_node.describe}."
|
93
|
+
new_cycle_key = new_cycle.to_settable
|
94
|
+
if cycles.has_key? new_cycle_key
|
95
|
+
log.debug "Already seen this cycle." if log.debug?
|
96
|
+
else
|
97
|
+
cycles[new_cycle_key] = new_cycle.onodes
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Unsolved neighbours imply a closed cyclic path.
|
105
|
+
# Are neighbours unsolved?
|
106
|
+
solved_neighbours = neighbours.reject{|node| node.height.nil?}
|
107
|
+
unless solved_neighbours.empty?
|
108
|
+
log.debug "We know the heights of neighbours #{solved_neighbours.collect{|node| node.describe}.join(',')}." if log.debug?
|
109
|
+
# Compute height from solved neighbours
|
110
|
+
height = solved_neighbours.map{|node| node.height}.max + 1
|
111
|
+
log.debug "Found height '#{height}' for #{traversal_node.describe}." if log.debug?
|
112
|
+
traversal_node.height = height
|
113
|
+
if by_height[height].nil?
|
114
|
+
by_height[height] = [traversal_node]
|
115
|
+
else
|
116
|
+
by_height[height].push(traversal_node)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
# If no solved neighbours, leave unsolved
|
120
|
+
|
121
|
+
# Move out of retrace phase
|
122
|
+
nodes_in_retrace_phase.delete settable
|
123
|
+
log.debug "Finished retracing #{traversal_node.describe}." if log.debug?
|
124
|
+
next
|
125
|
+
end
|
126
|
+
|
127
|
+
# Move current node to retrace phase, before checking for retracing neighbours in case is own neighbour
|
128
|
+
nodes_in_retrace_phase << settable
|
129
|
+
|
130
|
+
# Look for currently retracing neighbours and initiate cycles
|
131
|
+
retracing_neighbours = neighbours.select{|node| nodes_in_retrace_phase.include? node.onode.to_settable}
|
132
|
+
if not retracing_neighbours.empty?
|
133
|
+
log.debug "Initiating cycles for neighbours #{retracing_neighbours.collect{|node| node.describe}.join(',')} currently retracing." if log.debug?
|
134
|
+
# initiate cycles for each retracing neighbour
|
135
|
+
retracing_neighbours.each{|node| traversal_node.initiate_cycle(node.onode)}
|
136
|
+
end
|
137
|
+
|
138
|
+
# Return node stack and push neighbours
|
139
|
+
stack.push traversal_node
|
140
|
+
log.debug "Pushing #{traversal_node.describe} in retrace mode." if log.debug?
|
141
|
+
neighbours.each do |node|
|
142
|
+
node_settable = node.onode.to_settable
|
143
|
+
|
144
|
+
# Note the parent of neighbour unless already known
|
145
|
+
nodes_in = node.nodes_in
|
146
|
+
if not nodes_in.any?{|nbr| nbr.onode == node.onode}
|
147
|
+
nodes_in.push traversal_node
|
148
|
+
end
|
149
|
+
|
150
|
+
if nodes_in_retrace_phase.include? node_settable
|
151
|
+
# A currently retracing neighbour implies a cycle, cut it off here
|
152
|
+
log.debug "Neighbour #{node.describe} is retracing. Not revisiting." if log.debug?
|
153
|
+
else
|
154
|
+
log.debug "Pushing neighbour #{node.describe}." if log.debug?
|
155
|
+
stack.push node
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
return by_height, cycles.values
|
160
|
+
end
|
161
|
+
|
162
|
+
class TraversalNode
|
163
|
+
attr_accessor :onode, :height, :nodes_in, :nodes_out
|
164
|
+
|
165
|
+
def describe
|
166
|
+
@onode.to_shorthand
|
167
|
+
end
|
168
|
+
|
169
|
+
def node_id
|
170
|
+
@onode.node_id
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
class CyclicTraversalNode < TraversalNode
|
175
|
+
attr_accessor :cycles
|
176
|
+
|
177
|
+
def initiate_cycle(onode)
|
178
|
+
cycle = CyclePath.new
|
179
|
+
cycle.onodes = [onode]
|
180
|
+
merge_unclosed_cycle cycle
|
181
|
+
end
|
182
|
+
|
183
|
+
def merge_unclosed_cycle(cycle)
|
184
|
+
return if cycle.closed?
|
185
|
+
if cycle.onodes.last == @onode
|
186
|
+
cycle.closed = true
|
187
|
+
else
|
188
|
+
cycle.onodes.unshift @onode
|
189
|
+
end
|
190
|
+
if @cycles.nil?
|
191
|
+
@cycles = [cycle]
|
192
|
+
else
|
193
|
+
@cycles.push(cycle)
|
194
|
+
end
|
195
|
+
return cycle
|
196
|
+
end
|
197
|
+
|
198
|
+
class CyclePath
|
199
|
+
attr_accessor :onodes, :closed
|
200
|
+
|
201
|
+
def closed?
|
202
|
+
return @closed == true
|
203
|
+
end
|
204
|
+
|
205
|
+
def copy
|
206
|
+
cycle = CyclePath.new
|
207
|
+
cycle.onodes = @onodes[0..-1]
|
208
|
+
cycle.closed = @closed
|
209
|
+
cycle
|
210
|
+
end
|
211
|
+
|
212
|
+
def to_settable
|
213
|
+
# return sorted list of onode settables
|
214
|
+
@onodes.collect{|onode| onode.to_settable}.sort do |a, b|
|
215
|
+
result = a[0] <=> b[0]
|
216
|
+
if result == 0
|
217
|
+
result = a[1] == Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode::START_IS_FIRST ? -1 : 1
|
218
|
+
end
|
219
|
+
result
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
|
226
|
+
# maximum paths
|
227
|
+
def max_paths_through(by_height)
|
228
|
+
max_paths_from = {}
|
229
|
+
by_height.each_with_index do |nodes, level|
|
230
|
+
log.debug "At height #{level}." if log.debug?
|
231
|
+
if level == 0 # tips
|
232
|
+
nodes.each do |node|
|
233
|
+
log.debug "Counted maximum of 1 path to #{node.describe}." if log.debug?
|
234
|
+
max_paths_from[node.onode.to_settable] = 1
|
235
|
+
end
|
236
|
+
next
|
237
|
+
end
|
238
|
+
|
239
|
+
nodes.each do |node|
|
240
|
+
settable = node.onode.to_settable
|
241
|
+
max_paths_from_neighbours = node.nodes_out.collect{|nbr| max_paths_from[nbr.onode.to_settable]}.reject{|n| n.nil?}
|
242
|
+
log.debug "Found neighbours of #{node.describe} with maximum paths #{max_paths_from_neighbours.join(',')}." if log.debug?
|
243
|
+
max_paths_from[settable] = max_paths_from_neighbours.reduce{|memo, num| memo+num}
|
244
|
+
log.debug "Counted maximum of #{max_paths_from[settable]} paths to #{node.describe}." if log.debug?
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
# Get the graph roots (which are nodes with no parents) and add max_paths_from for each to get graph total
|
249
|
+
root_keys = by_height.flatten.select{|node| node.nodes_in.empty? }.collect{|node| node.onode.to_settable}
|
250
|
+
log.debug "Found graph roots #{root_keys.collect{|settable| settable[0]}.join(',')} with maximum paths #{root_keys.collect{|key| max_paths_from[key]}.join(',')}." if log.debug?
|
251
|
+
max_paths = root_keys.map{|settable| max_paths_from[settable]}.reduce{|memo, num| memo+num}
|
252
|
+
log.debug "Counted maximum of #{max_paths} through graph." if log.debug?
|
253
|
+
return max_paths
|
254
|
+
end
|
255
|
+
|
256
|
+
# minimum paths
|
257
|
+
def min_paths_through(by_height)
|
258
|
+
live_nodes = Set.new
|
259
|
+
max_alive_counter = 0
|
260
|
+
by_height.each_with_index do |nodes, level|
|
261
|
+
log.debug "At height #{level}." if log.debug?
|
262
|
+
# nodes at current level become live
|
263
|
+
nodes.each do |node|
|
264
|
+
settable = node.onode.to_settable
|
265
|
+
log.debug "Setting #{node.describe} as live." if log.debug?
|
266
|
+
live_nodes << settable
|
267
|
+
end
|
268
|
+
if level > 0
|
269
|
+
#children of nodes at current level are no longer live
|
270
|
+
nodes.each do |node|
|
271
|
+
children = node.nodes_out
|
272
|
+
children.each do |nbr|
|
273
|
+
log.debug "Setting child #{nbr.describe} of live node #{node.describe} as inactive." if log.debug?
|
274
|
+
live_nodes.delete(nbr.onode.to_settable)
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
log.debug "There are currently #{live_nodes.length} nodes alive. Max is #{max_alive_counter}." if log.debug?
|
280
|
+
if live_nodes.length > max_alive_counter
|
281
|
+
#track the maximum live nodes at any level
|
282
|
+
log.debug "Updating max to #{live_nodes.length}." if log.debug?
|
283
|
+
max_alive_counter = live_nodes.length
|
284
|
+
end
|
285
|
+
end
|
286
|
+
return max_alive_counter
|
287
|
+
end
|
288
|
+
|
289
|
+
def find_oriented_edge_of_range(graph, nodes=nil)
|
290
|
+
nodes ||= graph.nodes
|
291
|
+
log.debug "Looking for oriented start and end points from #{nodes.collect{|n| n.node_id}.join(',')}" if log.debug?
|
292
|
+
nodes_all_directions = nodes.collect{|node| [[node, true], [node, false]]}.flatten(1)
|
293
|
+
|
294
|
+
|
295
|
+
# Find nodes and directions which are not reachable from other nodes within range
|
296
|
+
unreached_nodes = {}
|
297
|
+
nodes_all_directions.each do |node_and_direction|
|
298
|
+
onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new node_and_direction[0], node_and_direction[1]
|
299
|
+
unless unreached_nodes.has_key? onode.to_settable
|
300
|
+
unreached_nodes[onode.to_settable] = onode
|
301
|
+
end
|
302
|
+
onode.next_neighbours(graph).each do |oneigh|
|
303
|
+
unreached_nodes[oneigh.to_settable] = nil
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
entry_points = unreached_nodes.values.reject{|n| n.nil?}
|
308
|
+
log.debug "Found the following nodes for a particular orientation have no paths connecting to other nodes in range: #{entry_points.collect{|n| n.to_shorthand}.join(',')}" if log.debug?
|
309
|
+
|
310
|
+
# Start from an unreachable node, and trace all paths until the reverse end of other unreachable nodes
|
311
|
+
# are reached, which are then defined as 'end' nodes. When finished, choose a remaining non-end unreachable
|
312
|
+
# node and repeat, stopping paths if an already seen node is encountered.
|
313
|
+
seen_nodes = Set.new
|
314
|
+
start_onodes = []
|
315
|
+
end_onodes = []
|
316
|
+
stack = DS::Stack.new
|
317
|
+
entry_points.reverse.each do |onode|
|
318
|
+
stack.push onode
|
319
|
+
end
|
320
|
+
|
321
|
+
while current_node = stack.pop
|
322
|
+
log.debug "At node #{current_node.to_shorthand}" if log.debug?
|
323
|
+
|
324
|
+
node_id = current_node.node_id
|
325
|
+
if seen_nodes.include? node_id or not nodes.include? current_node.node
|
326
|
+
log.debug "Node has been seen or is out of range. Skipping..." if log.debug?
|
327
|
+
next
|
328
|
+
end
|
329
|
+
seen_nodes << node_id
|
330
|
+
|
331
|
+
current_unreached = unreached_nodes[current_node.to_settable]
|
332
|
+
log.debug "Is current unreached? #{current_unreached}" if log.debug?
|
333
|
+
if current_unreached
|
334
|
+
log.debug "Defining starting node #{current_unreached.to_shorthand}" if log.debug?
|
335
|
+
# Found start node
|
336
|
+
start_onodes.push current_unreached
|
337
|
+
else
|
338
|
+
reverse_unreached = unreached_nodes[current_node.reverse.to_settable]
|
339
|
+
log.debug "Is reverse unreached? #{reverse_unreached}" if log.debug?
|
340
|
+
if reverse_unreached
|
341
|
+
log.debug "Found ending node #{reverse_unreached.to_shorthand}" if log.debug?
|
342
|
+
# Found end node
|
343
|
+
end_onodes.push reverse_unreached
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
current_node.next_neighbours(graph).each do |onode|
|
348
|
+
log.debug "Adding neighbour #{onode.to_shorthand} to stack" if log.debug?
|
349
|
+
stack.push onode
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
return start_onodes, end_onodes
|
354
|
+
end
|
355
|
+
end
|