finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
require 'ds'
|
|
2
|
+
require 'set'
|
|
3
|
+
|
|
4
|
+
module Bio
|
|
5
|
+
module AssemblyGraphAlgorithms
|
|
6
|
+
class GraphExplorer
|
|
7
|
+
# Return all paths that emenate from a given node, in the graph
|
|
8
|
+
def explore_from_node(graph, initial_path, leash_length)
|
|
9
|
+
# Do a simple depth first search, forking at each node. Vanilla graph traversal.
|
|
10
|
+
depth_first_search_stack = DS::Stack.new
|
|
11
|
+
first_path = ExplorationPath.new initial_path
|
|
12
|
+
depth_first_search_stack.push first_path
|
|
13
|
+
found_paths = []
|
|
14
|
+
# While there's more paths to explore
|
|
15
|
+
while current_path = depth_first_search_stack.pop
|
|
16
|
+
last = current_path.path.last
|
|
17
|
+
if !leash_length.nil? and current_path.path.length_in_bp > leash_length
|
|
18
|
+
current_path.termination_type = 'Leashed'
|
|
19
|
+
found_paths.push current_path
|
|
20
|
+
else
|
|
21
|
+
neighbours = current_path.path.neighbours_of_last_node(graph)
|
|
22
|
+
if neighbours.empty?
|
|
23
|
+
current_path.termination_type = 'Dead end / coverage'
|
|
24
|
+
found_paths.push current_path
|
|
25
|
+
else
|
|
26
|
+
neighbours_to_add = []
|
|
27
|
+
neighbours.each do |oriented_neighbour|
|
|
28
|
+
# Test for loops, I'm only interested in acyclic paths for the moment
|
|
29
|
+
if current_path.include?(oriented_neighbour)
|
|
30
|
+
#loop found, terminate path
|
|
31
|
+
new_path = current_path.copy
|
|
32
|
+
new_path.add_node oriented_neighbour
|
|
33
|
+
new_path.termination_type = 'Loop'
|
|
34
|
+
found_paths.push new_path
|
|
35
|
+
else
|
|
36
|
+
neighbours_to_add.push oriented_neighbour
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
neighbours_to_add.each_with_index do |oriented_neighbour, i|
|
|
40
|
+
# If the last neighbour is being added here, reuse the path
|
|
41
|
+
next_path = nil
|
|
42
|
+
if i == neighbours_to_add.length-1
|
|
43
|
+
next_path = current_path
|
|
44
|
+
else
|
|
45
|
+
next_path = current_path.copy
|
|
46
|
+
end
|
|
47
|
+
next_path.add_node oriented_neighbour
|
|
48
|
+
depth_first_search_stack.push next_path
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
return found_paths
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
class ExplorationPath
|
|
58
|
+
attr_accessor :path, :set_of_nodes, :termination_type
|
|
59
|
+
|
|
60
|
+
def initialize(path)
|
|
61
|
+
@path = path
|
|
62
|
+
@set_of_nodes = Set.new path.collect{|n| n.to_settable}
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def include?(oriented_node)
|
|
66
|
+
@set_of_nodes.include?(oriented_node.to_settable)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def add_node(onode)
|
|
70
|
+
path.add_oriented_node onode
|
|
71
|
+
@set_of_nodes << onode.to_settable
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def copy
|
|
75
|
+
anew = ExplorationPath.new @path.copy
|
|
76
|
+
return anew
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def to_s
|
|
80
|
+
@path.collect{|on| on.node_id}.join(',')
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
require 'bio-velvet'
|
|
2
|
+
require 'bio'
|
|
3
|
+
require 'pry'
|
|
4
|
+
|
|
5
|
+
class Bio::FinishM::GraphGenerator
|
|
6
|
+
include Bio::FinishM::Logging
|
|
7
|
+
|
|
8
|
+
DEFAULT_OPTIONS = {
|
|
9
|
+
:velvet_kmer_size => 51,
|
|
10
|
+
:assembly_coverage_cutoff => 3.5,
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
def add_options(option_parser, options)
|
|
14
|
+
options.merge!(DEFAULT_OPTIONS)
|
|
15
|
+
option_parser.on("--assembly-kmer NUMBER", "when assembling, use this kmer length [default: #{options[:velvet_kmer_size] }]") do |arg|
|
|
16
|
+
options[:velvet_kmer_size] = arg.to_i
|
|
17
|
+
end
|
|
18
|
+
option_parser.on("--assembly-coverage-cutoff NUMBER", "Require this much coverage in each node, all other nodes are removed [default: #{options[:assembly_coverage_cutoff] }]") do |arg|
|
|
19
|
+
options[:assembly_coverage_cutoff] = arg.to_f
|
|
20
|
+
end
|
|
21
|
+
option_parser.on("--post-assembly-coverage-cutoff NUMBER", "Require this much coverage in each node, implemented after assembly [default: not used]") do |arg|
|
|
22
|
+
options[:post_assembly_coverage_cutoff] = arg.to_f
|
|
23
|
+
end
|
|
24
|
+
option_parser.on("--velvet-directory PATH", "Output assembly intermediate files to this directory [default: use temporary directory, delete afterwards]") do |arg|
|
|
25
|
+
options[:output_assembly_path] = arg
|
|
26
|
+
end
|
|
27
|
+
option_parser.on("--already-assembled-velvet-directory PATH", "If an assembly directory has been specified previously with --velvet-directory, re-use this assembly rather than re-doing the assembly [default: off]") do |arg|
|
|
28
|
+
options[:previous_assembly] = arg
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Generate a ProbedGraph object, given one or more 'probe sequences'
|
|
33
|
+
# and metagenomic reads. This is a rather large method, but seems to
|
|
34
|
+
# be approximately repeated in different applications of FinishM, so
|
|
35
|
+
# creating it for DRY purposes.
|
|
36
|
+
#
|
|
37
|
+
# probe_sequences: DNA sequences (as String objects whose direction points to the outsides of contigs)
|
|
38
|
+
# read_inputs: a ReadInput object, containing the information to feed to velveth
|
|
39
|
+
#
|
|
40
|
+
# options:
|
|
41
|
+
# :probe_reads: a list of sequence numbers (numbering as per velvet Sequence file)
|
|
42
|
+
# :probe_read_names: a list of sequence names (not IDs) that are probes (convert the names to IDs using the CnyUnifiedSeqNames file). There may not be a one to one correspondence of these read names and the probe reads returned in the ProbedGraph since reads can map to multiple sequence IDs.
|
|
43
|
+
# :velvet_kmer_size: kmer
|
|
44
|
+
# :assembly_coverage_cutoff: coverage cutoff for nodes
|
|
45
|
+
# :post_assembly_coverage_cutoff: apply this coverage cutoff to nodes after parsing assembly
|
|
46
|
+
# :output_assembly_path: write assembly to this directory
|
|
47
|
+
# :previous_assembly: a velvet directory from a previous run of the same probe sequences and reads. (Don't re-assemble)
|
|
48
|
+
# :use_textual_sequence_file: by default, a binary sequence file is used. Set this true to get velvet to generate the Sequences file
|
|
49
|
+
# :remove_unconnected_nodes: delete nodes from the graph that are not connected to the probe nodes
|
|
50
|
+
# :graph_search_leash_length: when :remove_unconnected_nodes'ing, use this leash length
|
|
51
|
+
# :dont_parse_noded_reads: if true, skip parsing noded reads (ie the positions of the reads in the graph)
|
|
52
|
+
# :dont_parse_reads: if true, skip parsing reads (ie the sequences of the reads themselves)
|
|
53
|
+
def generate_graph(probe_sequences, read_inputs, options={})
|
|
54
|
+
options[:parse_sequence_file] ||= true
|
|
55
|
+
graph = nil
|
|
56
|
+
read_probing_graph = nil
|
|
57
|
+
finishm_graph = Bio::FinishM::ProbedGraph.new
|
|
58
|
+
|
|
59
|
+
log.debug "Options for generate_graph: #{options}" if log.debug?
|
|
60
|
+
|
|
61
|
+
velvet_binary_folder = File.join(File.dirname(__FILE__),'..','..','ext','src')
|
|
62
|
+
log.debug "Using velvet binary folder #{velvet_binary_folder}" if log.debug?
|
|
63
|
+
|
|
64
|
+
velvet_result = nil
|
|
65
|
+
|
|
66
|
+
probe_read_ids = nil
|
|
67
|
+
if options[:probe_reads]
|
|
68
|
+
probe_read_ids = options[:probe_reads]
|
|
69
|
+
else
|
|
70
|
+
probe_read_ids = Set.new((1..probe_sequences.length))
|
|
71
|
+
end
|
|
72
|
+
if options[:previous_assembly].nil? #If assembly has not already been carried out
|
|
73
|
+
Tempfile.open('probes.fa') do |tempfile|
|
|
74
|
+
50.times do # Do 50 times to make sure that velvet doesn't throw out parts of the graph that contain this contig
|
|
75
|
+
probe_sequences.each_with_index do |probe, i|
|
|
76
|
+
tempfile.puts ">probe#{i}"
|
|
77
|
+
tempfile.puts probe
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
tempfile.close
|
|
81
|
+
singles = read_inputs.fasta_singles
|
|
82
|
+
if singles and !singles.empty?
|
|
83
|
+
read_inputs.fasta_singles = [tempfile.path, singles].flatten
|
|
84
|
+
else
|
|
85
|
+
read_inputs.fasta_singles = [tempfile.path]
|
|
86
|
+
end
|
|
87
|
+
log.debug "Inputting probes into the assembly:\n#{File.open(tempfile.path).read}" if log.debug?
|
|
88
|
+
|
|
89
|
+
runner = Bio::Velvet::Runner.new
|
|
90
|
+
required_version = '1.2.10-wwood_finishm'
|
|
91
|
+
found_version = runner.binary_version(File.join(velvet_binary_folder, 'velveth'))
|
|
92
|
+
if found_version != required_version
|
|
93
|
+
raise "Detected velvet version incompatible with FinishM: #{found_version}, expected #{required_version} which is available from https://github.com/wwood/velvet (on branch less_clipping)"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
log.info "Assembling sampled reads with velvet"
|
|
97
|
+
raise "Need to specify -cov_cutoff" if options[:assembly_coverage_cutoff].nil?
|
|
98
|
+
raise "Need to specify a kmer size" if options[:velvet_kmer_size].nil?
|
|
99
|
+
# Bit of a hack, but have to use -short1 as the anchors because then start and end anchors will have node IDs 1,2,... etc.
|
|
100
|
+
use_binary = options[:use_textual_sequence_file] ? '' : '-create_binary'
|
|
101
|
+
velvet_result = runner.velvet(
|
|
102
|
+
options[:velvet_kmer_size],
|
|
103
|
+
"#{read_inputs.velvet_read_arguments} #{use_binary}",
|
|
104
|
+
"-read_trkg yes -cov_cutoff #{options[:assembly_coverage_cutoff] } -tour_bus no -read_to_node_binary yes",
|
|
105
|
+
:output_assembly_path => options[:output_assembly_path],
|
|
106
|
+
:velveth_path => File.join(velvet_binary_folder, 'velveth'),
|
|
107
|
+
:velvetg_path => File.join(velvet_binary_folder, 'velvetg'),
|
|
108
|
+
)
|
|
109
|
+
if log.debug?
|
|
110
|
+
log.debug "velveth stdout: #{velvet_result.velveth_stdout}"
|
|
111
|
+
log.debug "velveth stderr: #{velvet_result.velveth_stderr}"
|
|
112
|
+
log.debug "velvetg stdout: #{velvet_result.velvetg_stdout}"
|
|
113
|
+
log.debug "velvetg stderr: #{velvet_result.velvetg_stderr}"
|
|
114
|
+
end
|
|
115
|
+
log.info "Finished running assembly"
|
|
116
|
+
finishm_graph.velvet_result_directory = velvet_result.result_directory
|
|
117
|
+
end
|
|
118
|
+
else
|
|
119
|
+
log.info "Using previous assembly stored in #{options[:previous_assembly] }"
|
|
120
|
+
velvet_result = Bio::Velvet::Result.new
|
|
121
|
+
velvet_result.result_directory = options[:previous_assembly]
|
|
122
|
+
finishm_graph.velvet_result_directory = velvet_result.result_directory
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Check that the probe reads given are present in the assembly passed here
|
|
126
|
+
unless options[:dont_parse_reads]
|
|
127
|
+
sequence_store = parse_velvet_binary_reads(velvet_result.result_directory)
|
|
128
|
+
finishm_graph.velvet_sequences = sequence_store
|
|
129
|
+
if !check_probe_sequences(probe_sequences, sequence_store)
|
|
130
|
+
raise "Probe sequences changed since previous velvet assembly!"
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
log.info "Parsing the graph output from velvet"
|
|
135
|
+
opts = {
|
|
136
|
+
# noded reads are parsed in via C, if they are wanted at all
|
|
137
|
+
:dont_parse_noded_reads => true
|
|
138
|
+
}
|
|
139
|
+
bio_velvet_graph = Bio::Velvet::Graph.parse_from_file(
|
|
140
|
+
File.join(velvet_result.result_directory, 'LastGraph'),
|
|
141
|
+
opts
|
|
142
|
+
)
|
|
143
|
+
log.info "Finished parsing graph: found #{bio_velvet_graph.nodes.length} nodes and #{bio_velvet_graph.arcs.length} arcs"
|
|
144
|
+
|
|
145
|
+
if options[:dont_parse_noded_reads]
|
|
146
|
+
graph = bio_velvet_graph
|
|
147
|
+
else
|
|
148
|
+
log.info "Beginning parse of graph using velvet's parsing C code.."
|
|
149
|
+
read_probing_graph = Bio::Velvet::Underground::Graph.parse_from_file File.join(velvet_result.result_directory, 'LastGraph')
|
|
150
|
+
log.info "Completed velvet code parsing velvet graph"
|
|
151
|
+
|
|
152
|
+
# Make the two graphs into a hybrid one
|
|
153
|
+
graph = Bio::FinishM::HybridGraph.new(bio_velvet_graph, read_probing_graph)
|
|
154
|
+
end
|
|
155
|
+
finishm_graph.graph = graph
|
|
156
|
+
|
|
157
|
+
# Find the anchor nodes again
|
|
158
|
+
anchor_sequence_ids = probe_read_ids.to_a.sort
|
|
159
|
+
endings = []
|
|
160
|
+
unless probe_read_ids.empty? and options[:probe_read_names].nil? #don't bother trying to find probes if none exists
|
|
161
|
+
# Convert read names to read IDs if required
|
|
162
|
+
if options[:probe_read_names]
|
|
163
|
+
# Probe reads are given as names, not IDs. What are the corresponding probes then?
|
|
164
|
+
entries = Bio::Velvet::CnyUnifiedSeqNamesFile.extract_entries_using_grep_hack(
|
|
165
|
+
File.join(velvet_result.result_directory, 'CnyUnifiedSeq.names'),
|
|
166
|
+
options[:probe_read_names]
|
|
167
|
+
)
|
|
168
|
+
anchor_sequence_ids = []
|
|
169
|
+
double_counts = 0
|
|
170
|
+
options[:probe_read_names].each do |name| #maintain order of them as they are specified in the original array parameter
|
|
171
|
+
if entries[name].empty?
|
|
172
|
+
raise "Unable to find probe `#{name}' in the probe reads file - was it included in the assembly?"
|
|
173
|
+
elsif entries[name].length > 2
|
|
174
|
+
raise "Found >2 sequences named #{name} in the assembly, being conservative and not continuing"
|
|
175
|
+
else
|
|
176
|
+
entries[name].each do |res|
|
|
177
|
+
anchor_sequence_ids.push res.read_id
|
|
178
|
+
end
|
|
179
|
+
if entries[name].length == 2
|
|
180
|
+
double_counts += 1
|
|
181
|
+
log.debug "Found 2 sequences for #{name}" if log.debug?
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
if double_counts > 0
|
|
186
|
+
log.info "#{double_counts} reads were found twice (likely as pairs), including both as probes"
|
|
187
|
+
end
|
|
188
|
+
log.info "Recovered #{anchor_sequence_ids.length} sequences using their names" if log.info?
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# Parse the read to node structure
|
|
193
|
+
log.info "Reading ReadToNode.bin file.." if log.info?
|
|
194
|
+
finishm_graph.read_to_nodes = Bio::FinishM::ReadToNode.new(File.join(velvet_result.result_directory, 'ReadToNode.bin'))
|
|
195
|
+
|
|
196
|
+
finder = Bio::AssemblyGraphAlgorithms::NodeFinder.new
|
|
197
|
+
log.info "Finding probe nodes in the assembly"
|
|
198
|
+
c_graph_endings = finder.find_probes_from_read_to_node(finishm_graph.graph, finishm_graph.read_to_nodes, anchor_sequence_ids)
|
|
199
|
+
log.debug "Converting probe nodes found in C graph to Ruby analogues and adding to Ruby-parsed graph"
|
|
200
|
+
endings = c_graph_endings.collect do |node_direction_read|
|
|
201
|
+
if node_direction_read.empty?
|
|
202
|
+
# No probe found
|
|
203
|
+
[]
|
|
204
|
+
else #found a node.
|
|
205
|
+
#equivalent node
|
|
206
|
+
node = graph.nodes[node_direction_read[0].node_id]
|
|
207
|
+
#equivalent direction
|
|
208
|
+
direction = node_direction_read[1]
|
|
209
|
+
#equivalent noded read
|
|
210
|
+
nr = Bio::Velvet::Graph::NodedRead.new
|
|
211
|
+
# nr.read_id = read_id
|
|
212
|
+
# nr.offset_from_start_of_node = row[1].to_i
|
|
213
|
+
# nr.start_coord = row[2].to_i
|
|
214
|
+
# nr.direction = current_node_direction
|
|
215
|
+
cnr = node_direction_read[2]
|
|
216
|
+
nr.read_id = cnr.read_id
|
|
217
|
+
nr.offset_from_start_of_node = cnr.offset_from_start_of_node
|
|
218
|
+
nr.start_coord = cnr.start_coord
|
|
219
|
+
nr.direction = direction
|
|
220
|
+
# collect
|
|
221
|
+
[node, direction, nr]
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
finishm_graph.probe_nodes = endings.collect{|array| array[0]}
|
|
226
|
+
finishm_graph.probe_node_directions = endings.collect{|array| array[1]}
|
|
227
|
+
finishm_graph.probe_node_reads = endings.collect{|array| array[2]}
|
|
228
|
+
|
|
229
|
+
# Check to make sure the probe sequences map to nodes in the graph
|
|
230
|
+
if finishm_graph.completely_probed?
|
|
231
|
+
if log.info?
|
|
232
|
+
found_all = true
|
|
233
|
+
num_found = 0
|
|
234
|
+
finishm_graph.probe_nodes.each_with_index do |probe,i|
|
|
235
|
+
if probe.nil?
|
|
236
|
+
found_all = false
|
|
237
|
+
log.debug "Unable to recover probe ##{i+1}, perhaps this will cause problems, but proceding optimistically"
|
|
238
|
+
else
|
|
239
|
+
num_found += 1
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
if found_all
|
|
243
|
+
if finishm_graph.probe_nodes.empty?
|
|
244
|
+
log.debug "No probes specified, so didn't find any"
|
|
245
|
+
else
|
|
246
|
+
log.info "Found all anchoring nodes in the graph."
|
|
247
|
+
end
|
|
248
|
+
else
|
|
249
|
+
log.info "Found #{num_found} of #{finishm_graph.probe_nodes.length} anchoring nodes in the graph, ignoring the rest"
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
else
|
|
253
|
+
raise "Unable to find all anchor reads from the assembly, cannot continue. This is probably an error with this script, not you. Probes not found: #{finishm_graph.missing_probe_indices.inspect}"
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
if options[:post_assembly_coverage_cutoff]
|
|
257
|
+
log.info "Removing nodes with coverage < #{options[:post_assembly_coverage_cutoff] } from graph.."
|
|
258
|
+
original_num_nodes = graph.nodes.length
|
|
259
|
+
original_num_arcs = graph.arcs.length
|
|
260
|
+
filter = Bio::AssemblyGraphAlgorithms::CoverageBasedGraphFilter.new
|
|
261
|
+
filter.remove_low_coverage_nodes(graph,
|
|
262
|
+
options[:post_assembly_coverage_cutoff],
|
|
263
|
+
:whitelisted_sequences => Set.new(anchor_sequence_ids)
|
|
264
|
+
)
|
|
265
|
+
log.info "Removed #{original_num_nodes-graph.nodes.length} nodes and #{original_num_arcs-graph.arcs.length} arcs, leaving #{graph.nodes.length} nodes and #{graph.arcs.length} arcs."
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
if options[:remove_unconnected_nodes]
|
|
269
|
+
if options[:graph_search_leash_length]
|
|
270
|
+
log.info "Removing nodes unconnected to probe nodes from the graph using leash #{options[:graph_search_leash_length] }.."
|
|
271
|
+
else
|
|
272
|
+
log.info "Removing nodes unconnected to probe nodes from the graph without using a leash.."
|
|
273
|
+
end
|
|
274
|
+
original_num_nodes = graph.nodes.length
|
|
275
|
+
original_num_arcs = graph.arcs.length
|
|
276
|
+
filter = Bio::AssemblyGraphAlgorithms::ConnectivityBasedGraphFilter.new
|
|
277
|
+
filter.remove_unconnected_nodes(
|
|
278
|
+
graph,
|
|
279
|
+
finishm_graph.probe_nodes.reject{|n| n.nil?},
|
|
280
|
+
:leash_length => options[:graph_search_leash_length]
|
|
281
|
+
)
|
|
282
|
+
log.info "Removed #{original_num_nodes-graph.nodes.length} nodes and #{original_num_arcs-graph.arcs.length} arcs, leaving #{graph.nodes.length} nodes and #{graph.arcs.length} arcs."
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
return finishm_graph
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
# Read in the reads from a velvet result
|
|
289
|
+
def parse_velvet_binary_reads(velvet_result_directory)
|
|
290
|
+
sequences_file_path = File.join velvet_result_directory, 'CnyUnifiedSeq'
|
|
291
|
+
log.info "Reading in the actual sequences of all reads from #{sequences_file_path}"
|
|
292
|
+
sequences = Bio::Velvet::Underground::BinarySequenceStore.new sequences_file_path
|
|
293
|
+
log.info "Read in #{sequences.length} sequences"
|
|
294
|
+
return sequences
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# When re-using an assembly, sometimes need to make
|
|
298
|
+
# sure that the probe sequences used previously are the same
|
|
299
|
+
# as what is given this time. Given am Array of probe sequences
|
|
300
|
+
# and a binary_sequence_file, check the probe sequences are the
|
|
301
|
+
# consistent.
|
|
302
|
+
def check_probe_sequences(probe_sequences, sequence_store)
|
|
303
|
+
return true if probe_sequences.nil?
|
|
304
|
+
|
|
305
|
+
probe_sequences.each_with_index do |probe, i|
|
|
306
|
+
log.debug "Checking probe sequence \##{i+1}" if log.debug?
|
|
307
|
+
if sequence_store[i+1].upcase != probe.upcase
|
|
308
|
+
log.error "Probe sequence \##{i+1} has changed - perhaps the wrong velvet assembly directory was specified, or a fresh assembly is required?"
|
|
309
|
+
return false
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
log.debug "Presence of #{probe_sequences.length} probe sequences verified"
|
|
313
|
+
return true
|
|
314
|
+
end
|
|
315
|
+
end
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
require 'ds'
|
|
2
|
+
require 'set'
|
|
3
|
+
|
|
4
|
+
class Bio::AssemblyGraphAlgorithms::HeightFinder
|
|
5
|
+
include Bio::FinishM::Logging
|
|
6
|
+
|
|
7
|
+
# visit nodes in range and determine heights
|
|
8
|
+
def traverse(graph, initial_nodes, options={})
|
|
9
|
+
by_height = []
|
|
10
|
+
traversal_nodes = {}
|
|
11
|
+
cycles = {}
|
|
12
|
+
nodes_in_retrace_phase = Set.new
|
|
13
|
+
|
|
14
|
+
# depth-first so stack
|
|
15
|
+
stack = DS::Stack.new
|
|
16
|
+
initial_nodes.each do |onode|
|
|
17
|
+
next if options[:range] and options[:range].none?{|other| other == onode.node }
|
|
18
|
+
traversal_node = CyclicTraversalNode.new
|
|
19
|
+
traversal_node.onode = options[:reverse] ? onode.reverse : onode
|
|
20
|
+
traversal_node.nodes_in = []
|
|
21
|
+
traversal_nodes[traversal_node.onode.to_settable] = traversal_node
|
|
22
|
+
stack.push traversal_node
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
while traversal_node = stack.pop
|
|
26
|
+
settable = traversal_node.onode.to_settable
|
|
27
|
+
describe = nil
|
|
28
|
+
|
|
29
|
+
if log.debug?
|
|
30
|
+
log.debug "visiting #{traversal_node.describe}."
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Consider node solved if height is known.
|
|
34
|
+
if not traversal_node.height.nil?
|
|
35
|
+
log.debug "Height of #{traversal_node.describe} is known. Skip." if log.debug?
|
|
36
|
+
next
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# find neighbours
|
|
40
|
+
neighbours = traversal_node.nodes_out
|
|
41
|
+
if neighbours.nil?
|
|
42
|
+
neighbours = traversal_node.onode.next_neighbours(graph)
|
|
43
|
+
if options[:range]
|
|
44
|
+
neighbours.reject!{|onode| options[:range].none?{|other| other == onode.node}} #not in defined range
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Get or create traversal version of node
|
|
48
|
+
neighbours = neighbours.collect do |onode|
|
|
49
|
+
nbr_settable = onode.to_settable
|
|
50
|
+
traversal_nbr = traversal_nodes[nbr_settable]
|
|
51
|
+
if traversal_nbr.nil?
|
|
52
|
+
traversal_nbr = CyclicTraversalNode.new
|
|
53
|
+
traversal_nbr.onode = onode
|
|
54
|
+
traversal_nbr.nodes_in = []
|
|
55
|
+
traversal_nodes[nbr_settable] = traversal_nbr
|
|
56
|
+
end
|
|
57
|
+
traversal_nbr
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
#remember neighbours
|
|
61
|
+
traversal_node.nodes_out = neighbours
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Can we solve the node?
|
|
66
|
+
if neighbours.empty? #check for a tip
|
|
67
|
+
log.debug "#{traversal_node.describe} is a tip." if log.debug?
|
|
68
|
+
traversal_node.height = 0
|
|
69
|
+
if by_height[0].nil?
|
|
70
|
+
by_height[0] = [traversal_node]
|
|
71
|
+
else
|
|
72
|
+
by_height[0].push(traversal_node)
|
|
73
|
+
end
|
|
74
|
+
log.debug "Found height '0' for #{traversal_node.describe}." if log.debug?
|
|
75
|
+
next
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
if nodes_in_retrace_phase.include? settable
|
|
79
|
+
log.debug "Retracing back to #{traversal_node.describe}." if log.debug?
|
|
80
|
+
|
|
81
|
+
# Neighbours should have been explored
|
|
82
|
+
# Are neighbours involved in cycles?
|
|
83
|
+
cyclic_neighbours = neighbours.reject{|node| node.cycles.nil?}
|
|
84
|
+
if not cyclic_neighbours.empty?
|
|
85
|
+
# current node is in a cycle if a neighbour is in an unclosed cycle
|
|
86
|
+
log.debug "Found cyclic neighbours #{cyclic_neighbours.collect{|node| node.describe}.join(',')}." if log.debug?
|
|
87
|
+
cyclic_neighbours.each do |node|
|
|
88
|
+
node.cycles.each do |cycle|
|
|
89
|
+
log.debug "Merging cycle #{cycle.onodes.collect{|onode| onode.to_shorthand}.join(',')}." if log.debug?
|
|
90
|
+
new_cycle = traversal_node.merge_unclosed_cycle cycle.copy
|
|
91
|
+
if not new_cycle.nil? and new_cycle.closed?
|
|
92
|
+
log.debug "Cycle completes at #{traversal_node.describe}."
|
|
93
|
+
new_cycle_key = new_cycle.to_settable
|
|
94
|
+
if cycles.has_key? new_cycle_key
|
|
95
|
+
log.debug "Already seen this cycle." if log.debug?
|
|
96
|
+
else
|
|
97
|
+
cycles[new_cycle_key] = new_cycle.onodes
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Unsolved neighbours imply a closed cyclic path.
|
|
105
|
+
# Are neighbours unsolved?
|
|
106
|
+
solved_neighbours = neighbours.reject{|node| node.height.nil?}
|
|
107
|
+
unless solved_neighbours.empty?
|
|
108
|
+
log.debug "We know the heights of neighbours #{solved_neighbours.collect{|node| node.describe}.join(',')}." if log.debug?
|
|
109
|
+
# Compute height from solved neighbours
|
|
110
|
+
height = solved_neighbours.map{|node| node.height}.max + 1
|
|
111
|
+
log.debug "Found height '#{height}' for #{traversal_node.describe}." if log.debug?
|
|
112
|
+
traversal_node.height = height
|
|
113
|
+
if by_height[height].nil?
|
|
114
|
+
by_height[height] = [traversal_node]
|
|
115
|
+
else
|
|
116
|
+
by_height[height].push(traversal_node)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
# If no solved neighbours, leave unsolved
|
|
120
|
+
|
|
121
|
+
# Move out of retrace phase
|
|
122
|
+
nodes_in_retrace_phase.delete settable
|
|
123
|
+
log.debug "Finished retracing #{traversal_node.describe}." if log.debug?
|
|
124
|
+
next
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Move current node to retrace phase, before checking for retracing neighbours in case is own neighbour
|
|
128
|
+
nodes_in_retrace_phase << settable
|
|
129
|
+
|
|
130
|
+
# Look for currently retracing neighbours and initiate cycles
|
|
131
|
+
retracing_neighbours = neighbours.select{|node| nodes_in_retrace_phase.include? node.onode.to_settable}
|
|
132
|
+
if not retracing_neighbours.empty?
|
|
133
|
+
log.debug "Initiating cycles for neighbours #{retracing_neighbours.collect{|node| node.describe}.join(',')} currently retracing." if log.debug?
|
|
134
|
+
# initiate cycles for each retracing neighbour
|
|
135
|
+
retracing_neighbours.each{|node| traversal_node.initiate_cycle(node.onode)}
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Return node stack and push neighbours
|
|
139
|
+
stack.push traversal_node
|
|
140
|
+
log.debug "Pushing #{traversal_node.describe} in retrace mode." if log.debug?
|
|
141
|
+
neighbours.each do |node|
|
|
142
|
+
node_settable = node.onode.to_settable
|
|
143
|
+
|
|
144
|
+
# Note the parent of neighbour unless already known
|
|
145
|
+
nodes_in = node.nodes_in
|
|
146
|
+
if not nodes_in.any?{|nbr| nbr.onode == node.onode}
|
|
147
|
+
nodes_in.push traversal_node
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
if nodes_in_retrace_phase.include? node_settable
|
|
151
|
+
# A currently retracing neighbour implies a cycle, cut it off here
|
|
152
|
+
log.debug "Neighbour #{node.describe} is retracing. Not revisiting." if log.debug?
|
|
153
|
+
else
|
|
154
|
+
log.debug "Pushing neighbour #{node.describe}." if log.debug?
|
|
155
|
+
stack.push node
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
return by_height, cycles.values
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
class TraversalNode
|
|
163
|
+
attr_accessor :onode, :height, :nodes_in, :nodes_out
|
|
164
|
+
|
|
165
|
+
def describe
|
|
166
|
+
@onode.to_shorthand
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def node_id
|
|
170
|
+
@onode.node_id
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
class CyclicTraversalNode < TraversalNode
|
|
175
|
+
attr_accessor :cycles
|
|
176
|
+
|
|
177
|
+
def initiate_cycle(onode)
|
|
178
|
+
cycle = CyclePath.new
|
|
179
|
+
cycle.onodes = [onode]
|
|
180
|
+
merge_unclosed_cycle cycle
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def merge_unclosed_cycle(cycle)
|
|
184
|
+
return if cycle.closed?
|
|
185
|
+
if cycle.onodes.last == @onode
|
|
186
|
+
cycle.closed = true
|
|
187
|
+
else
|
|
188
|
+
cycle.onodes.unshift @onode
|
|
189
|
+
end
|
|
190
|
+
if @cycles.nil?
|
|
191
|
+
@cycles = [cycle]
|
|
192
|
+
else
|
|
193
|
+
@cycles.push(cycle)
|
|
194
|
+
end
|
|
195
|
+
return cycle
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
class CyclePath
|
|
199
|
+
attr_accessor :onodes, :closed
|
|
200
|
+
|
|
201
|
+
def closed?
|
|
202
|
+
return @closed == true
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def copy
|
|
206
|
+
cycle = CyclePath.new
|
|
207
|
+
cycle.onodes = @onodes[0..-1]
|
|
208
|
+
cycle.closed = @closed
|
|
209
|
+
cycle
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def to_settable
|
|
213
|
+
# return sorted list of onode settables
|
|
214
|
+
@onodes.collect{|onode| onode.to_settable}.sort do |a, b|
|
|
215
|
+
result = a[0] <=> b[0]
|
|
216
|
+
if result == 0
|
|
217
|
+
result = a[1] == Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode::START_IS_FIRST ? -1 : 1
|
|
218
|
+
end
|
|
219
|
+
result
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# maximum paths
|
|
227
|
+
def max_paths_through(by_height)
|
|
228
|
+
max_paths_from = {}
|
|
229
|
+
by_height.each_with_index do |nodes, level|
|
|
230
|
+
log.debug "At height #{level}." if log.debug?
|
|
231
|
+
if level == 0 # tips
|
|
232
|
+
nodes.each do |node|
|
|
233
|
+
log.debug "Counted maximum of 1 path to #{node.describe}." if log.debug?
|
|
234
|
+
max_paths_from[node.onode.to_settable] = 1
|
|
235
|
+
end
|
|
236
|
+
next
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
nodes.each do |node|
|
|
240
|
+
settable = node.onode.to_settable
|
|
241
|
+
max_paths_from_neighbours = node.nodes_out.collect{|nbr| max_paths_from[nbr.onode.to_settable]}.reject{|n| n.nil?}
|
|
242
|
+
log.debug "Found neighbours of #{node.describe} with maximum paths #{max_paths_from_neighbours.join(',')}." if log.debug?
|
|
243
|
+
max_paths_from[settable] = max_paths_from_neighbours.reduce{|memo, num| memo+num}
|
|
244
|
+
log.debug "Counted maximum of #{max_paths_from[settable]} paths to #{node.describe}." if log.debug?
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Get the graph roots (which are nodes with no parents) and add max_paths_from for each to get graph total
|
|
249
|
+
root_keys = by_height.flatten.select{|node| node.nodes_in.empty? }.collect{|node| node.onode.to_settable}
|
|
250
|
+
log.debug "Found graph roots #{root_keys.collect{|settable| settable[0]}.join(',')} with maximum paths #{root_keys.collect{|key| max_paths_from[key]}.join(',')}." if log.debug?
|
|
251
|
+
max_paths = root_keys.map{|settable| max_paths_from[settable]}.reduce{|memo, num| memo+num}
|
|
252
|
+
log.debug "Counted maximum of #{max_paths} through graph." if log.debug?
|
|
253
|
+
return max_paths
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# minimum paths
|
|
257
|
+
def min_paths_through(by_height)
|
|
258
|
+
live_nodes = Set.new
|
|
259
|
+
max_alive_counter = 0
|
|
260
|
+
by_height.each_with_index do |nodes, level|
|
|
261
|
+
log.debug "At height #{level}." if log.debug?
|
|
262
|
+
# nodes at current level become live
|
|
263
|
+
nodes.each do |node|
|
|
264
|
+
settable = node.onode.to_settable
|
|
265
|
+
log.debug "Setting #{node.describe} as live." if log.debug?
|
|
266
|
+
live_nodes << settable
|
|
267
|
+
end
|
|
268
|
+
if level > 0
|
|
269
|
+
#children of nodes at current level are no longer live
|
|
270
|
+
nodes.each do |node|
|
|
271
|
+
children = node.nodes_out
|
|
272
|
+
children.each do |nbr|
|
|
273
|
+
log.debug "Setting child #{nbr.describe} of live node #{node.describe} as inactive." if log.debug?
|
|
274
|
+
live_nodes.delete(nbr.onode.to_settable)
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
log.debug "There are currently #{live_nodes.length} nodes alive. Max is #{max_alive_counter}." if log.debug?
|
|
280
|
+
if live_nodes.length > max_alive_counter
|
|
281
|
+
#track the maximum live nodes at any level
|
|
282
|
+
log.debug "Updating max to #{live_nodes.length}." if log.debug?
|
|
283
|
+
max_alive_counter = live_nodes.length
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
return max_alive_counter
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def find_oriented_edge_of_range(graph, nodes=nil)
|
|
290
|
+
nodes ||= graph.nodes
|
|
291
|
+
log.debug "Looking for oriented start and end points from #{nodes.collect{|n| n.node_id}.join(',')}" if log.debug?
|
|
292
|
+
nodes_all_directions = nodes.collect{|node| [[node, true], [node, false]]}.flatten(1)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
# Find nodes and directions which are not reachable from other nodes within range
|
|
296
|
+
unreached_nodes = {}
|
|
297
|
+
nodes_all_directions.each do |node_and_direction|
|
|
298
|
+
onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new node_and_direction[0], node_and_direction[1]
|
|
299
|
+
unless unreached_nodes.has_key? onode.to_settable
|
|
300
|
+
unreached_nodes[onode.to_settable] = onode
|
|
301
|
+
end
|
|
302
|
+
onode.next_neighbours(graph).each do |oneigh|
|
|
303
|
+
unreached_nodes[oneigh.to_settable] = nil
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
entry_points = unreached_nodes.values.reject{|n| n.nil?}
|
|
308
|
+
log.debug "Found the following nodes for a particular orientation have no paths connecting to other nodes in range: #{entry_points.collect{|n| n.to_shorthand}.join(',')}" if log.debug?
|
|
309
|
+
|
|
310
|
+
# Start from an unreachable node, and trace all paths until the reverse end of other unreachable nodes
|
|
311
|
+
# are reached, which are then defined as 'end' nodes. When finished, choose a remaining non-end unreachable
|
|
312
|
+
# node and repeat, stopping paths if an already seen node is encountered.
|
|
313
|
+
seen_nodes = Set.new
|
|
314
|
+
start_onodes = []
|
|
315
|
+
end_onodes = []
|
|
316
|
+
stack = DS::Stack.new
|
|
317
|
+
entry_points.reverse.each do |onode|
|
|
318
|
+
stack.push onode
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
while current_node = stack.pop
|
|
322
|
+
log.debug "At node #{current_node.to_shorthand}" if log.debug?
|
|
323
|
+
|
|
324
|
+
node_id = current_node.node_id
|
|
325
|
+
if seen_nodes.include? node_id or not nodes.include? current_node.node
|
|
326
|
+
log.debug "Node has been seen or is out of range. Skipping..." if log.debug?
|
|
327
|
+
next
|
|
328
|
+
end
|
|
329
|
+
seen_nodes << node_id
|
|
330
|
+
|
|
331
|
+
current_unreached = unreached_nodes[current_node.to_settable]
|
|
332
|
+
log.debug "Is current unreached? #{current_unreached}" if log.debug?
|
|
333
|
+
if current_unreached
|
|
334
|
+
log.debug "Defining starting node #{current_unreached.to_shorthand}" if log.debug?
|
|
335
|
+
# Found start node
|
|
336
|
+
start_onodes.push current_unreached
|
|
337
|
+
else
|
|
338
|
+
reverse_unreached = unreached_nodes[current_node.reverse.to_settable]
|
|
339
|
+
log.debug "Is reverse unreached? #{reverse_unreached}" if log.debug?
|
|
340
|
+
if reverse_unreached
|
|
341
|
+
log.debug "Found ending node #{reverse_unreached.to_shorthand}" if log.debug?
|
|
342
|
+
# Found end node
|
|
343
|
+
end_onodes.push reverse_unreached
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
current_node.next_neighbours(graph).each do |onode|
|
|
348
|
+
log.debug "Adding neighbour #{onode.to_shorthand} to stack" if log.debug?
|
|
349
|
+
stack.push onode
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
return start_onodes, end_onodes
|
|
354
|
+
end
|
|
355
|
+
end
|