finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
Binary file
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
require 'graphviz'
|
|
2
|
+
require 'set'
|
|
3
|
+
|
|
4
|
+
class Bio::Velvet::Graph::Node
|
|
5
|
+
def includes_kmers?(list_of_kmers)
|
|
6
|
+
list_of_kmers.each do |kmer|
|
|
7
|
+
return true if ends_of_kmers_of_node.include?(kmer) or ends_of_kmers_of_twin_node.include?(kmer)
|
|
8
|
+
end
|
|
9
|
+
return false
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
module Bio
|
|
14
|
+
module Assembly
|
|
15
|
+
class ABVisualiser
|
|
16
|
+
include Bio::FinishM::Logging
|
|
17
|
+
|
|
18
|
+
# Visualise a (velvet) graph, as a graphviz object
|
|
19
|
+
#
|
|
20
|
+
# Possible options:
|
|
21
|
+
# :start_kmers: list of kmers to denote the start node(s)
|
|
22
|
+
# :end_kmers: list of kmers to denote the end node(s)
|
|
23
|
+
# :start_node_id: ID of node to mark as a start
|
|
24
|
+
# :end_node_id:ID of node to mark as a end
|
|
25
|
+
# :start_node_ids: array of node IDs to mark as a start
|
|
26
|
+
# :end_node_ids:array of node IDs to mark as a end
|
|
27
|
+
# :coverage_cutoff: ignore nodes with less coverage than this cutoff
|
|
28
|
+
# :digraph: output as a digraph (default true, else output undirected graph)
|
|
29
|
+
# :nodes: an Enumerable of nodes to be visualised.
|
|
30
|
+
# :node_id_to_nickname: add these names to the node descriptions. Hash of integer node id to String.
|
|
31
|
+
# :paired_nodes_hash: a hash of node_id to Enumerable of node_ids where there is paired-end connections
|
|
32
|
+
def graphviz(graph, options={})
|
|
33
|
+
opts = {}
|
|
34
|
+
opts[:type] = :digraph unless options[:digraph] == false
|
|
35
|
+
opts[:overlap] = :scale
|
|
36
|
+
graphviz = GraphViz.new(:G, opts)
|
|
37
|
+
|
|
38
|
+
nodes_to_explore = Set.new(options[:nodes].to_a)
|
|
39
|
+
nodes_to_explore ||= Set.new(graph.nodes)
|
|
40
|
+
|
|
41
|
+
# Add all the nodes
|
|
42
|
+
blacklisted_node_ids = Set.new
|
|
43
|
+
log.debug "Converting nodes to GraphViz format"
|
|
44
|
+
nodes_to_explore.each do |node|
|
|
45
|
+
cov = node.coverage
|
|
46
|
+
if options[:coverage_cutoff] and cov < options[:coverage_cutoff] and !cov.nil?
|
|
47
|
+
blacklisted_node_ids.add node.node_id
|
|
48
|
+
else
|
|
49
|
+
cov_string = cov.nil? ? '' : cov.round
|
|
50
|
+
label = "n#{node.node_id}_length#{node.ends_of_kmers_of_node.length}_coverage#{cov_string}"
|
|
51
|
+
if options[:node_id_to_nickname] and options[:node_id_to_nickname].key?(node.node_id)
|
|
52
|
+
label += ' ' + options[:node_id_to_nickname][node.node_id]
|
|
53
|
+
end
|
|
54
|
+
mods = {
|
|
55
|
+
:label => label,
|
|
56
|
+
}
|
|
57
|
+
includes_start = false
|
|
58
|
+
includes_end = false
|
|
59
|
+
if options[:start_kmers]
|
|
60
|
+
includes_start = node.includes_kmers?(options[:start_kmers])
|
|
61
|
+
end
|
|
62
|
+
if options[:end_kmers]
|
|
63
|
+
includes_end = node.includes_kmers?(options[:end_kmers])
|
|
64
|
+
end
|
|
65
|
+
if options[:start_node_id]
|
|
66
|
+
includes_start = true if node.node_id == options[:start_node_id]
|
|
67
|
+
end
|
|
68
|
+
if options[:end_node_id]
|
|
69
|
+
includes_end = true if node.node_id == options[:end_node_id]
|
|
70
|
+
end
|
|
71
|
+
if options[:start_node_ids]
|
|
72
|
+
includes_start = true if options[:start_node_ids].include? node.node_id
|
|
73
|
+
end
|
|
74
|
+
if options[:end_node_ids]
|
|
75
|
+
includes_end = true if options[:end_node_ids].include? node.node_id
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
if includes_start and includes_end
|
|
79
|
+
log.warn "Start and end kmers detected in the same node!"
|
|
80
|
+
elsif includes_start
|
|
81
|
+
mods[:color] = "red"
|
|
82
|
+
elsif includes_end
|
|
83
|
+
mods[:color] = "green"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
graphviz.add_nodes node.node_id.to_s, mods
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Add all the edges
|
|
91
|
+
arcs_of_interest = graph.arcs
|
|
92
|
+
if options[:nodes]
|
|
93
|
+
arcs_of_interest = Set.new
|
|
94
|
+
nodes_to_explore.each do |node|
|
|
95
|
+
graph.arcs.get_arcs_by_node_id(node.node_id).each do |arc|
|
|
96
|
+
arcs_of_interest << arc
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
log.info "Converting #{arcs_of_interest.length} arcs to GraphViz format"
|
|
102
|
+
arcs_of_interest.each do |arc|
|
|
103
|
+
# Add unless the node has been blacklisted
|
|
104
|
+
unless blacklisted_node_ids.include? arc.begin_node_id or
|
|
105
|
+
blacklisted_node_ids.include? arc.end_node_id or
|
|
106
|
+
!nodes_to_explore.include?(graph.nodes[arc.begin_node_id]) or
|
|
107
|
+
!nodes_to_explore.include?(graph.nodes[arc.end_node_id])
|
|
108
|
+
|
|
109
|
+
# Direction of the arrows, to denote connection to beginning of node (connects to start = in-arrow-head to node on output graph)
|
|
110
|
+
if arc.connects_end_to_beginning?(arc.begin_node_id, arc.end_node_id)
|
|
111
|
+
graphviz.add_edges arc.begin_node_id.to_s, arc.end_node_id.to_s
|
|
112
|
+
elsif arc.connects_end_to_end?(arc.begin_node_id, arc.end_node_id)
|
|
113
|
+
graphviz.add_edges arc.begin_node_id.to_s, arc.end_node_id.to_s, {:dir => "none"}
|
|
114
|
+
elsif arc.connects_beginning_to_beginning?(arc.begin_node_id, arc.end_node_id)
|
|
115
|
+
graphviz.add_edges arc.begin_node_id.to_s, arc.end_node_id.to_s, {:dir => "both"}
|
|
116
|
+
elsif arc.connects_beginning_to_end?(arc.begin_node_id, arc.end_node_id)
|
|
117
|
+
graphviz.add_edges arc.end_node_id.to_s, arc.begin_node_id.to_s
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Add paired_nodes_hash pairs
|
|
123
|
+
unless options[:paired_nodes_hash].nil?
|
|
124
|
+
# Create a list of arc node pairs for len calculation
|
|
125
|
+
arc_pairs = arcs_of_interest.collect do |arc|
|
|
126
|
+
[arc.begin_node_id, arc.end_node_id].sort
|
|
127
|
+
end
|
|
128
|
+
directly_connected_node_pairs = Set.new(arc_pairs)
|
|
129
|
+
|
|
130
|
+
# Keep track of pairs so multiple arcs are not drawn e.g. node1 => node2 and node2=>node1
|
|
131
|
+
pairs_added = Set.new
|
|
132
|
+
log.info "Adding paired-end linkages to GraphViz format.."
|
|
133
|
+
options[:paired_nodes_hash].each do |node1_id, connected_node_ids|
|
|
134
|
+
connected_node_ids.each do |node2_id|
|
|
135
|
+
next if node1_id == node2_id #skip within-node connections
|
|
136
|
+
sorted = [node1_id, node2_id].sort #sort so only a single connection is shown
|
|
137
|
+
unless pairs_added.include?(sorted) or
|
|
138
|
+
!nodes_to_explore.include?(graph.nodes[node1_id]) or
|
|
139
|
+
!nodes_to_explore.include?(graph.nodes[node2_id]) or
|
|
140
|
+
directly_connected_node_pairs.include?([node1_id, node2_id].sort)
|
|
141
|
+
|
|
142
|
+
graphviz.add_edges sorted[0].to_s, sorted[1].to_s, {:color => "grey", :dir => "none", :style => 'dashed'}
|
|
143
|
+
pairs_added << sorted
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
return graphviz
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class SimplifiedGraph
|
|
154
|
+
def self.create_from_velvet_graph(graph)
|
|
155
|
+
nodes_incorporated = 0
|
|
156
|
+
|
|
157
|
+
# While there is more of the graph to incorporate
|
|
158
|
+
while nodes_incorporated < graph.nodes.length
|
|
159
|
+
raise "not implemented"
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# A class representing a linear string of nodes without forks
|
|
164
|
+
class Path
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
require 'ds'
|
|
2
|
+
require 'set'
|
|
3
|
+
|
|
4
|
+
module Bio
|
|
5
|
+
module AssemblyGraphAlgorithms
|
|
6
|
+
|
|
7
|
+
# Represents a set of trails, and whether or not circularity has been detected,
|
|
8
|
+
# and whether too many paths have been detected.
|
|
9
|
+
class TrailSet
|
|
10
|
+
attr_accessor :trails
|
|
11
|
+
attr_accessor :circular_paths_detected
|
|
12
|
+
attr_accessor :max_path_limit_exceeded
|
|
13
|
+
include Enumerable
|
|
14
|
+
|
|
15
|
+
def initialize
|
|
16
|
+
@circular_paths_detected = false
|
|
17
|
+
@max_path_limit_exceeded = false
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def each
|
|
21
|
+
unless @trails.nil?
|
|
22
|
+
@trails.each{|t| yield t}
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
class AcyclicConnectionFinder
|
|
28
|
+
include Bio::FinishM::Logging
|
|
29
|
+
|
|
30
|
+
# Find trails between two oriented nodes, both facing the same way along the path.
|
|
31
|
+
#
|
|
32
|
+
# Options:
|
|
33
|
+
# * :recoherence_kmer: use a longer kmer to help de-bubble and de-cicularise (default don't use this)
|
|
34
|
+
# * :sequences: Bio::Velvet::Sequence object holding sequences of nodes within leash length
|
|
35
|
+
def find_trails_between_nodes(graph, initial_oriented_node, terminal_oriented_node, leash_length, options={})
|
|
36
|
+
|
|
37
|
+
#TODO: this is now implemented in the finishm_graph object - just get it from there
|
|
38
|
+
initial_path = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
39
|
+
initial_path.add_oriented_node initial_oriented_node
|
|
40
|
+
|
|
41
|
+
finder = Bio::AssemblyGraphAlgorithms::SingleCoherentPathsBetweenNodesFinder.new
|
|
42
|
+
return finder.find_all_connections_between_two_nodes(
|
|
43
|
+
graph, initial_path, terminal_oriented_node, leash_length, options[:recoherence_kmer], options[:sequences], options
|
|
44
|
+
)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Algorithms like SingleCoherentWanderer#wander give an overly short
|
|
48
|
+
# base pair distance between two probes, because the length of the node
|
|
49
|
+
# containing the probe at either end is not included in the calculation.
|
|
50
|
+
#
|
|
51
|
+
# Return the calibrated distance i.e. the true base pair distance between
|
|
52
|
+
# the start of each node pair. Returned is the given distance plus the
|
|
53
|
+
# distance between the start of each probe and the end of the containing
|
|
54
|
+
# node.
|
|
55
|
+
def calibrate_distance_accounting_for_probes(finishm_graph, probe1_index, probe2_index, distance)
|
|
56
|
+
read1 = finishm_graph.probe_node_reads[probe1_index]
|
|
57
|
+
read2 = finishm_graph.probe_node_reads[probe2_index]
|
|
58
|
+
probe_node1 = finishm_graph.probe_nodes[probe1_index]
|
|
59
|
+
probe_node2 = finishm_graph.probe_nodes[probe2_index]
|
|
60
|
+
|
|
61
|
+
# If the start and end nodes are the same, that's a special case:
|
|
62
|
+
if finishm_graph.probe_nodes[probe1_index].node_id == finishm_graph.probe_nodes[probe2_index].node_id
|
|
63
|
+
if (read1.direction == true and read2.direction == false) or
|
|
64
|
+
(read1.direction == false and read2.direction == true)
|
|
65
|
+
return probe_node1.length - read1.offset_from_start_of_node - read2.offset_from_start_of_node - finishm_graph.graph.hash_length
|
|
66
|
+
else
|
|
67
|
+
raise "Programming error: to connect within a single contig two probes must have opposite directions: found #{read1.direction} and #{read2.direction}"
|
|
68
|
+
end
|
|
69
|
+
else
|
|
70
|
+
# Usual case - start and end nodes are different nodes
|
|
71
|
+
to_return = distance
|
|
72
|
+
# add the first probe side
|
|
73
|
+
to_return += probe_node1.length-read1.offset_from_start_of_node-finishm_graph.graph.hash_length
|
|
74
|
+
# add the second probe side
|
|
75
|
+
to_return += probe_node2.length-read1.offset_from_start_of_node-finishm_graph.graph.hash_length
|
|
76
|
+
return to_return
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
require 'ds'
|
|
2
|
+
require 'set'
|
|
3
|
+
|
|
4
|
+
module Bio
|
|
5
|
+
module AssemblyGraphAlgorithms
|
|
6
|
+
class AllOrfsFinder
|
|
7
|
+
include Bio::FinishM::Logging
|
|
8
|
+
|
|
9
|
+
CODON_LENGTH = 3
|
|
10
|
+
START_CODONS = ['ATG']
|
|
11
|
+
STOP_CODONS = ['TAG', 'TAA', 'TGA']
|
|
12
|
+
|
|
13
|
+
# Search for open reading frames in a graph, in all the paths begining at a set of
|
|
14
|
+
# nodes through a graph (or a subset defined by range)
|
|
15
|
+
def find_orfs_in_graph(graph, initial_paths, minimum_orf_length=nil,
|
|
16
|
+
range=nil, max_gapfill_paths=nil, max_cycles=nil)
|
|
17
|
+
|
|
18
|
+
problems = find_all_problems(graph,
|
|
19
|
+
initial_paths,
|
|
20
|
+
:range => range
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
find_orfs_from_problems(problems, {
|
|
24
|
+
:min_orf_length => minimum_orf_length,
|
|
25
|
+
:max_gapfill_paths => max_gapfill_paths,
|
|
26
|
+
:max_cycles => max_cycles,
|
|
27
|
+
})
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def find_all_problems(graph, initial_paths, options={})
|
|
32
|
+
problems = SingleCoherentPathsBetweenNodesFinder::ProblemSet.new
|
|
33
|
+
prob_finder = AllProblemTrailsFinder.new(graph, initial_paths)
|
|
34
|
+
|
|
35
|
+
while current_path = prob_finder.pop
|
|
36
|
+
log.debug "considering #{current_path}" if log.debug?
|
|
37
|
+
set_key = path_to_settable(current_path)
|
|
38
|
+
|
|
39
|
+
if problems.has_key? set_key
|
|
40
|
+
log.debug "Already seen this problem" if log.debug?
|
|
41
|
+
prob = problems[set_key]
|
|
42
|
+
prob.known_paths.push current_path
|
|
43
|
+
next
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
log.debug "New dynamic problem being solved" if log.debug?
|
|
47
|
+
# new problem being solved here
|
|
48
|
+
problem = SingleCoherentPathsBetweenNodesFinder::DynamicProgrammingProblem.new
|
|
49
|
+
problem.known_paths.push current_path.copy
|
|
50
|
+
problems[set_key] = problem
|
|
51
|
+
|
|
52
|
+
neighbours = current_path.neighbours_of_last_node(graph)
|
|
53
|
+
if options[:range]
|
|
54
|
+
neighbours.select!{|onode| options[:range].include? onode.node_id}
|
|
55
|
+
end
|
|
56
|
+
if neighbours.empty?
|
|
57
|
+
log.debug "last is terminal" if log.debug?
|
|
58
|
+
|
|
59
|
+
problems.terminal_node_keys ||= Set.new
|
|
60
|
+
problems.terminal_node_keys << set_key
|
|
61
|
+
next
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# explore the forward neighbours
|
|
65
|
+
prob_finder.push_next_neighbours current_path
|
|
66
|
+
log.debug "Priority queue size: #{prob_finder.size}" if log.debug?
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
return problems
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def path_to_settable(path)
|
|
73
|
+
return SingleCoherentPathsBetweenNodesFinder.new.path_to_settable(path, path.last.node.length_alone + CODON_LENGTH - 1)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def find_orfs_from_problems(problems, options={})
|
|
78
|
+
max_num_paths = options[:max_gapfill_paths]
|
|
79
|
+
max_num_paths ||= 2196
|
|
80
|
+
max_cycles = options[:max_cycles] || 1
|
|
81
|
+
min_orf_length = options[:minimum_orf_length] || 0
|
|
82
|
+
|
|
83
|
+
counter = SingleCoherentPathsBetweenNodesFinder::CycleCounter.new(max_cycles)
|
|
84
|
+
decide_stack = lambda do |to_push|
|
|
85
|
+
part_nodes = [to_push[0].trail, to_push[1].otrail ? to_push[1].otrail.trail : []]
|
|
86
|
+
if max_cycles < counter.path_cycle_count(part_nodes.flatten)
|
|
87
|
+
log.debug "Pushing #{part_nodes.collect{|part| part.collect{|onode| onode.node.node_id}.join(',')}.join(' and ') } to secondary stack" if log.debug?
|
|
88
|
+
return true
|
|
89
|
+
else
|
|
90
|
+
log.debug "Pushing #{part_nodes.collect{|part| part.collect{|onode| onode.node.node_id}.join(',')}.join(' and ') } to main stack" if log.debug?
|
|
91
|
+
return false
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
stack = SingleCoherentPathsBetweenNodesFinder::DualStack.new &decide_stack
|
|
96
|
+
to_return = Bio::AssemblyGraphAlgorithms::TrailSet.new
|
|
97
|
+
|
|
98
|
+
# if there is no solutions to the overall problem then there is no solution at all
|
|
99
|
+
if problems.terminal_node_keys.nil? or problems.terminal_node_keys.empty?
|
|
100
|
+
to_return.trails = []
|
|
101
|
+
return to_return
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# push all "ending in the final node" solutions to the stack
|
|
105
|
+
problems.terminal_node_keys.each do |key|
|
|
106
|
+
overall_solution = problems[key]
|
|
107
|
+
first_part = overall_solution.known_paths[0].copy
|
|
108
|
+
second_part = ORFsTracingTrail.new
|
|
109
|
+
second_part.otrail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
110
|
+
stack.push [first_part, second_part]
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
all_paths_hash = {}
|
|
114
|
+
while path_parts = stack.pop
|
|
115
|
+
first_part = path_parts[0]
|
|
116
|
+
second_part = path_parts[1]
|
|
117
|
+
log.debug "#{first_part.to_shorthand} and #{second_part.otrail.to_shorthand}" if log.debug?
|
|
118
|
+
|
|
119
|
+
# Look for codons
|
|
120
|
+
log.debug "Searching for codons in first node of second part" if log.debug?
|
|
121
|
+
fwd_result, twin_result = search_for_codons(second_part.otrail) # search from start of second part
|
|
122
|
+
|
|
123
|
+
# Forward direction
|
|
124
|
+
if not fwd_result.stop_markers.empty? or not fwd_result.start_markers.empty?
|
|
125
|
+
[fwd_result.stop_markers, fwd_result.start_markers].each do |markers|
|
|
126
|
+
markers.each do |marker|
|
|
127
|
+
marker.position_in_trail = marker.position_in_node
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
current_fwd_stops = []
|
|
131
|
+
current_fwd_starts = []
|
|
132
|
+
if second_part.fwd_orfs_result
|
|
133
|
+
current_fwd_stops.concat second_part.fwd_orfs_result.initial_stop_markers
|
|
134
|
+
current_fwd_starts.concat second_part.fwd_orfs_result.initial_start_markers
|
|
135
|
+
current_fwd_starts.concat second_part.fwd_orfs_result.final_start_markers
|
|
136
|
+
end
|
|
137
|
+
current_fwd_stops.concat fwd_result.stop_markers
|
|
138
|
+
current_fwd_starts.concat fwd_result.start_markers
|
|
139
|
+
log.debug "Attempt to pair start codons at #{current_fwd_starts.collect{|m| m.position_in_trail}.join(',')} with stop codons at #{current_fwd_stops.collect{|m| m.position_in_trail}.join(',')}" if log.debug?
|
|
140
|
+
fwd_orfs_result = orfs_from_start_stop_markers(current_fwd_starts, current_fwd_stops, min_orf_length)
|
|
141
|
+
log.debug "Found pairs #{fwd_orfs_result.start_stop_pairs.collect{|pair| pair.collect{|m| m.position_in_trail}.join(',')}.join('],[')}" if log.debug?
|
|
142
|
+
|
|
143
|
+
# collect previous start-stop pairs
|
|
144
|
+
if second_part.fwd_orfs_result
|
|
145
|
+
fwd_orfs_result.start_stop_pairs.concat second_part.fwd_orfs_result.start_stop_pairs
|
|
146
|
+
end
|
|
147
|
+
second_part.fwd_orfs_result = fwd_orfs_result
|
|
148
|
+
log.debug "Remaining forward stops: #{second_part.fwd_orfs_result.initial_stop_markers.collect{|m| m.position_in_trail}.join(',')}" if log.debug?
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Reverse direction
|
|
152
|
+
if not twin_result.stop_markers.empty? or not twin_result.start_markers.empty?
|
|
153
|
+
# twin stop positons are relative to start of first path twin node
|
|
154
|
+
# add length of rest of path to get position relative to start of last path twin node
|
|
155
|
+
length_of_rest_of_path = second_part.otrail.length_in_bp_within_path - second_part.otrail[0].node.length_alone
|
|
156
|
+
[twin_result.stop_markers, twin_result.start_markers].each do |markers|
|
|
157
|
+
markers.each do |marker|
|
|
158
|
+
marker.position_in_trail = marker.position_in_node + length_of_rest_of_path
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
current_twin_stops = []
|
|
162
|
+
current_twin_starts = []
|
|
163
|
+
if second_part.twin_orfs_result
|
|
164
|
+
current_twin_stops.concat second_part.twin_orfs_result.initial_stop_markers
|
|
165
|
+
current_twin_starts.concat second_part.twin_orfs_result.initial_start_markers
|
|
166
|
+
current_twin_starts.concat second_part.twin_orfs_result.final_start_markers
|
|
167
|
+
end
|
|
168
|
+
current_twin_stops.concat twin_result.stop_markers
|
|
169
|
+
current_twin_starts.concat twin_result.start_markers
|
|
170
|
+
log.debug "Attempt to pair stop codons in reverse direction at #{current_twin_stops.collect{|m| m.position_in_trail}.join(',')} with starts at #{current_twin_starts.collect{|m| m.position_in_trail}.join(',')}" if log.debug?
|
|
171
|
+
twin_orfs_result = orfs_from_start_stop_markers(current_twin_starts, current_twin_stops, min_orf_length)
|
|
172
|
+
log.debug "Found pairs #{twin_orfs_result.start_stop_pairs.collect{|pair| pair.collect{|m| m.position_in_trail}.join(',')}.join('],[')}" if log.debug?
|
|
173
|
+
|
|
174
|
+
# collect previous start-stop pairs
|
|
175
|
+
if second_part.twin_orfs_result
|
|
176
|
+
twin_orfs_result.start_stop_pairs.concat second_part.twin_orfs_result.start_stop_pairs
|
|
177
|
+
end
|
|
178
|
+
second_part.twin_orfs_result = twin_orfs_result
|
|
179
|
+
log.debug "Remaining twin starts: #{second_part.twin_orfs_result.final_start_markers.collect{|m| m.position_in_trail}.join(',')}" if log.debug?
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
if first_part.length == 0
|
|
183
|
+
# If we've tracked all the way to the beginning, then there's no need to track further
|
|
184
|
+
|
|
185
|
+
key = second_part.otrail.trail.hash
|
|
186
|
+
all_paths_hash[key] ||= second_part
|
|
187
|
+
next
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
last = first_part.last
|
|
191
|
+
if second_part.otrail.trail.include? last
|
|
192
|
+
log.debug "Cycle at node #{last.node_id} detected in previous path #{second_part.collect{|onode| onode.node_id}.join(',')}." if log.debug?
|
|
193
|
+
to_return.circular_paths_detected = true
|
|
194
|
+
if max_cycles == 0 or max_cycles < counter.path_cycle_count([last, second_part.otrail.trail].flatten)
|
|
195
|
+
log.debug "Not finishing cyclic path with too many repeated cycles." if log.debug?
|
|
196
|
+
next
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
paths_to_last = problems[path_to_settable(first_part)].known_paths
|
|
201
|
+
paths_to_last.each do |path|
|
|
202
|
+
new_second_part = ORFsTracingTrail.new
|
|
203
|
+
new_second_part.otrail = second_part.otrail.copy
|
|
204
|
+
new_second_part.otrail.trail.unshift last
|
|
205
|
+
|
|
206
|
+
if second_part.fwd_orfs_result
|
|
207
|
+
# offset positions in forward direction
|
|
208
|
+
offset = last.node.length_alone
|
|
209
|
+
copy_and_offset_marker = lambda do |marker|
|
|
210
|
+
m = marker.copy
|
|
211
|
+
m.position_in_trail += offset
|
|
212
|
+
m
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
new_fwd_orfs_result = ORFsResult.new
|
|
216
|
+
new_fwd_orfs_result.start_stop_pairs = second_part.fwd_orfs_result.start_stop_pairs.collect do |pairs|
|
|
217
|
+
pairs.collect ©_and_offset_marker
|
|
218
|
+
end
|
|
219
|
+
new_fwd_orfs_result.initial_start_markers = second_part.fwd_orfs_result.initial_start_markers.collect ©_and_offset_marker
|
|
220
|
+
new_fwd_orfs_result.initial_stop_markers = second_part.fwd_orfs_result.initial_stop_markers.collect ©_and_offset_marker
|
|
221
|
+
new_fwd_orfs_result.final_start_markers = second_part.fwd_orfs_result.final_start_markers.collect ©_and_offset_marker
|
|
222
|
+
new_second_part.fwd_orfs_result = new_fwd_orfs_result
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
if second_part.twin_orfs_result
|
|
226
|
+
new_twin_orfs_result = ORFsResult.new
|
|
227
|
+
new_twin_orfs_result.start_stop_pairs = second_part.twin_orfs_result.start_stop_pairs.collect do |pairs|
|
|
228
|
+
pairs.collect{|marker| marker.copy}
|
|
229
|
+
end
|
|
230
|
+
new_twin_orfs_result.initial_stop_markers = second_part.twin_orfs_result.initial_stop_markers.collect{|marker| marker.copy}
|
|
231
|
+
new_twin_orfs_result.final_start_markers = second_part.twin_orfs_result.final_start_markers.collect{|marker| marker.copy}
|
|
232
|
+
new_second_part.twin_orfs_result = new_twin_orfs_result
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
new_first_part = path.copy
|
|
236
|
+
new_first_part.remove_last_node
|
|
237
|
+
|
|
238
|
+
stack.push [new_first_part, new_second_part]
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# max_num_paths parachute
|
|
242
|
+
# The parachute can kill the search once the main stack exceeds max_gapfill_paths,
|
|
243
|
+
# since all paths on it are valid.
|
|
244
|
+
if !max_num_paths.nil? and (stack.sizes[0] + all_paths_hash.length) > max_num_paths
|
|
245
|
+
log.info "Exceeded the maximum number of allowable paths in this gapfill" if log.info?
|
|
246
|
+
to_return.max_path_limit_exceeded = true
|
|
247
|
+
all_paths_hash = {}
|
|
248
|
+
break
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
to_return.trails = all_paths_hash.values
|
|
253
|
+
return to_return
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Returns:
|
|
257
|
+
# SearchResult relative to start of first node
|
|
258
|
+
# SearchResult relative to start of first twin node
|
|
259
|
+
def search_for_codons(otrail)
|
|
260
|
+
return SearchResult.new, SearchResult.new if otrail.trail.empty?
|
|
261
|
+
onode = otrail[0]
|
|
262
|
+
|
|
263
|
+
make_marker = lambda do |position|
|
|
264
|
+
marker = Marker.new
|
|
265
|
+
marker.position_in_node = position
|
|
266
|
+
marker.node = onode.node
|
|
267
|
+
marker
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
#log.debug "Looking for codons #{words.to_a}" if log.debug?
|
|
271
|
+
words = Set.new(START_CODONS).merge(STOP_CODONS)
|
|
272
|
+
|
|
273
|
+
# search within first / last node
|
|
274
|
+
fwd_nodes_sequence, twin_nodes_sequence = get_sequences onode
|
|
275
|
+
#log.debug "Looking in #{fwd_nodes_sequence}" if log.debug?
|
|
276
|
+
fwd_within_first = word_search(fwd_nodes_sequence, words, CODON_LENGTH)
|
|
277
|
+
#log.debug "Found codons #{fwd_within_first.keys.join(',')} at positions #{fwd_within_first.values.join(',')} in #{fwd_nodes_sequence}" if log.debug?
|
|
278
|
+
#log.debug "Looking in #{twin_nodes_sequence}" if log.debug?
|
|
279
|
+
twin_within_first = word_search(twin_nodes_sequence, words, CODON_LENGTH)
|
|
280
|
+
#log.debug "Found codons #{twin_within_first.keys.join(',')} in twin node at positions #{twin_within_first.values.join(',')} in #{fwd_nodes_sequence}" if log.debug?
|
|
281
|
+
|
|
282
|
+
# extend search along trail
|
|
283
|
+
fwd_overlap_sequence, twin_overlap_sequence = get_overlap_sequences(otrail, CODON_LENGTH)
|
|
284
|
+
#log.debug "Looking in #{fwd_overlap_sequence}" if log.debug?
|
|
285
|
+
fwd_in_overlap = word_search(fwd_overlap_sequence, words, CODON_LENGTH)
|
|
286
|
+
#log.debug "Found codons #{fwd_in_overlap.keys.join(',')} in twin node at positions #{fwd_in_overlap.values.join(',')} in #{fwd_overlap_sequence}" if log.debug?
|
|
287
|
+
#log.debug "Looking for stops in #{twin_overlap_sequence}" if log.debug?
|
|
288
|
+
twin_in_overlap = word_search(twin_overlap_sequence, words, CODON_LENGTH)
|
|
289
|
+
#log.debug "Found codons #{twin_in_overlap.keys.join(',')} in twin node at positions #{twin_in_overlap.values.join(',')} in #{twin_overlap_sequence}" if log.debug?
|
|
290
|
+
|
|
291
|
+
# offset positions in overlap to be relative to start of node / twin node
|
|
292
|
+
offset = onode.node.length_alone
|
|
293
|
+
fwd_in_overlap.each{|word, inds| fwd_in_overlap[word] = inds.collect{|pos| pos + offset}}
|
|
294
|
+
twin_in_overlap.each{|word, inds| twin_in_overlap[word] = inds.collect{|pos| pos + 1 - CODON_LENGTH}}
|
|
295
|
+
#log.debug "Codons in overlap positions relative to start of first node #{fwd_in_overlap.values.join(',')}" if log.debug?
|
|
296
|
+
#log.debug "Codons in overlap positions relative to start of first twin node #{twin_in_overlap.values.join(',')}" if log.debug?
|
|
297
|
+
|
|
298
|
+
# assemble result
|
|
299
|
+
fwd_result = SearchResult.new
|
|
300
|
+
twin_result = SearchResult.new
|
|
301
|
+
|
|
302
|
+
push_mark_to_list = lambda do |list, word, positions|
|
|
303
|
+
if positions.has_key? word
|
|
304
|
+
list.push positions[word].collect{|pos| make_marker.call pos}
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
fwd_positions = fwd_within_first.merge fwd_in_overlap
|
|
309
|
+
twin_positions = twin_within_first.merge twin_in_overlap
|
|
310
|
+
START_CODONS.each do |word|
|
|
311
|
+
# fwd starts
|
|
312
|
+
push_mark_to_list.call(fwd_result.start_markers, word, fwd_positions)
|
|
313
|
+
# twin starts
|
|
314
|
+
push_mark_to_list.call(twin_result.start_markers, word, twin_positions)
|
|
315
|
+
end
|
|
316
|
+
fwd_result.start_markers.flatten!
|
|
317
|
+
twin_result.start_markers.flatten!
|
|
318
|
+
#log.debug "Positions of start codons #{fwd_result.start_markers.join(',')}" if log.debug?
|
|
319
|
+
#log.debug "Positions of start codons in twin node #{twin_result.start_markers.join(',')}" if log.debug?
|
|
320
|
+
|
|
321
|
+
STOP_CODONS.each do |word|
|
|
322
|
+
#fwd stops
|
|
323
|
+
push_mark_to_list.call(fwd_result.stop_markers, word, fwd_positions)
|
|
324
|
+
# twin stops
|
|
325
|
+
push_mark_to_list.call(twin_result.stop_markers, word, twin_positions)
|
|
326
|
+
end
|
|
327
|
+
fwd_result.stop_markers.flatten!
|
|
328
|
+
twin_result.stop_markers.flatten!
|
|
329
|
+
#log.debug "Positions of stop codons #{fwd_result.stop_markers.join(',')}" if log.debug?
|
|
330
|
+
#log.debug "Positions of stop codons in twin node #{twin_result.stop_markers.join(',')}" if log.debug?
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
return fwd_result, twin_result
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def get_overlap_sequences(otrail, size, from_end=false)
|
|
337
|
+
return '' if otrail.trail.empty?
|
|
338
|
+
trail = otrail.trail
|
|
339
|
+
if from_end # reverse as new sequence is taken from front of trail
|
|
340
|
+
trail = trail.reverse
|
|
341
|
+
end
|
|
342
|
+
twin_nodes_sequence = ''
|
|
343
|
+
fwd_nodes_sequence = ''
|
|
344
|
+
|
|
345
|
+
index = 0
|
|
346
|
+
onode = trail[index]
|
|
347
|
+
|
|
348
|
+
start_length = onode.node.length_alone
|
|
349
|
+
extension_length = -start_length
|
|
350
|
+
|
|
351
|
+
while extension_length < (size - 1) and index < trail.length
|
|
352
|
+
#log.debug "Extended #{extension_length} / #{size} bps and #{index+1} / #{otrail.length} nodes" if log.debug?
|
|
353
|
+
extend_fwd_nodes_sequence, extend_twin_nodes_sequence = get_sequences(onode)
|
|
354
|
+
if from_end
|
|
355
|
+
twin_nodes_sequence += extend_twin_nodes_sequence
|
|
356
|
+
fwd_nodes_sequence = extend_fwd_nodes_sequence + fwd_nodes_sequence
|
|
357
|
+
else
|
|
358
|
+
twin_nodes_sequence = extend_twin_nodes_sequence + twin_nodes_sequence
|
|
359
|
+
fwd_nodes_sequence += extend_fwd_nodes_sequence
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
extension_length += onode.node.length_alone
|
|
363
|
+
index += 1
|
|
364
|
+
onode = trail[index]
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
#log.debug "Found forward and twin sequences #{fwd_nodes_sequence} and #{twin_nodes_sequence} before trimming" if log.debug?
|
|
368
|
+
|
|
369
|
+
trim_start = start_length - (size - 1)
|
|
370
|
+
trim_start = 0 if trim_start < 0
|
|
371
|
+
trim_end = extension_length - (size - 1)
|
|
372
|
+
trim_end = 0 if trim_end < 0
|
|
373
|
+
#log.debug "Trimming first #{trim_start} and last #{trim_end} positions for output" if log.debug?
|
|
374
|
+
if from_end
|
|
375
|
+
return fwd_nodes_sequence[trim_end..-(trim_start+1)], twin_nodes_sequence[trim_start..-(trim_end+1)]
|
|
376
|
+
else
|
|
377
|
+
return fwd_nodes_sequence[trim_start..-(trim_end+1)], twin_nodes_sequence[trim_end..-(trim_start+1)]
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
def get_sequences(onode)
|
|
382
|
+
if onode.starts_at_start?
|
|
383
|
+
twin_nodes_sequence = onode.node.ends_of_kmers_of_twin_node
|
|
384
|
+
fwd_nodes_sequence = onode.node.ends_of_kmers_of_node
|
|
385
|
+
else
|
|
386
|
+
twin_nodes_sequence = onode.node.ends_of_kmers_of_node
|
|
387
|
+
fwd_nodes_sequence = onode.node.ends_of_kmers_of_twin_node
|
|
388
|
+
end
|
|
389
|
+
return fwd_nodes_sequence, twin_nodes_sequence
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
def word_search(sequence, words, size)
|
|
393
|
+
position = size
|
|
394
|
+
inds = {}
|
|
395
|
+
|
|
396
|
+
while position <= sequence.length
|
|
397
|
+
word = sequence[position-size...position]
|
|
398
|
+
if words.include? word
|
|
399
|
+
inds[word] ||=[]
|
|
400
|
+
inds[word].push position
|
|
401
|
+
end
|
|
402
|
+
position += 1
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
return inds
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
# Given an Array of start positions and stop positions, return
|
|
410
|
+
# start,stop base position pairs (not inclusive of the stop codon's bases)
|
|
411
|
+
# that are ORFs with a given minimum orf length (length measured in nucleotides).
|
|
412
|
+
# The returned object is an instance of ORFsResult.
|
|
413
|
+
def orfs_from_start_stop_markers(start_markers, stop_markers, minimum_orf_length)
|
|
414
|
+
# Split up the start and stop positions into 3 frames
|
|
415
|
+
frame_starts = [[],[],[]]
|
|
416
|
+
frame_stops = [[],[],[]]
|
|
417
|
+
start_markers.each do |marker|
|
|
418
|
+
frame_starts[marker.position_in_trail % 3].push marker
|
|
419
|
+
end
|
|
420
|
+
stop_markers.each do |marker|
|
|
421
|
+
frame_stops[marker.position_in_trail % 3].push marker
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
# For each frame
|
|
425
|
+
to_return = ORFsResult.new
|
|
426
|
+
(0..2).each do |frame|
|
|
427
|
+
frame_pairs = []
|
|
428
|
+
|
|
429
|
+
# Sort arrays in descending order because Array#pop removes from the end of the array
|
|
430
|
+
starts = frame_starts[frame].sort{|a,b| b.position_in_trail<=>a.position_in_trail}
|
|
431
|
+
stops = frame_stops[frame].sort{|a,b| b.position_in_trail<=>a.position_in_trail}
|
|
432
|
+
|
|
433
|
+
current_start = starts.pop
|
|
434
|
+
current_stop = stops.pop
|
|
435
|
+
if current_stop
|
|
436
|
+
# Record first stop codon
|
|
437
|
+
to_return.initial_stop_markers.push current_stop
|
|
438
|
+
end
|
|
439
|
+
if current_start and (current_stop.nil? or current_start.position_in_trail < current_stop.position_in_trail)
|
|
440
|
+
# Record first start codon before any stop codons
|
|
441
|
+
to_return.initial_start_markers.push current_start
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
while current_start and current_stop
|
|
445
|
+
# Move to next start after current stop
|
|
446
|
+
while current_start and current_start.position_in_trail < current_stop.position_in_trail
|
|
447
|
+
current_start = starts.pop
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
if current_start
|
|
451
|
+
# Move to next stop after current start
|
|
452
|
+
while current_stop and current_stop.position_in_trail < current_start.position_in_trail
|
|
453
|
+
current_stop = stops.pop
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
if current_start and current_stop
|
|
458
|
+
# This stop codon stops the current reading frame.
|
|
459
|
+
if current_stop.position_in_trail - current_start.position_in_trail >= minimum_orf_length
|
|
460
|
+
# Found a legit ORF
|
|
461
|
+
to_return.start_stop_pairs.push [current_start, current_stop]
|
|
462
|
+
end
|
|
463
|
+
# Whether or not last ORF was long enough, search for the next start codon
|
|
464
|
+
next
|
|
465
|
+
else
|
|
466
|
+
if current_start
|
|
467
|
+
to_return.final_start_markers.push current_start
|
|
468
|
+
end
|
|
469
|
+
break
|
|
470
|
+
end
|
|
471
|
+
end
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
return to_return
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
def orf_sequences_from_trails(trails)
|
|
478
|
+
to_return = {}
|
|
479
|
+
trails.each do |trail|
|
|
480
|
+
fwd_sequence, twin_sequence = trail.otrail.sequences_within_path
|
|
481
|
+
# forward / twin directions
|
|
482
|
+
[
|
|
483
|
+
[fwd_sequence, trail.fwd_orfs_result],
|
|
484
|
+
[twin_sequence, trail.twin_orfs_result]
|
|
485
|
+
].each do |sequence_and_result|
|
|
486
|
+
sequence, result = sequence_and_result
|
|
487
|
+
if result
|
|
488
|
+
result.start_stop_pairs.each do |pair|
|
|
489
|
+
start_position = pair[0].position_in_trail - 3
|
|
490
|
+
end_position = pair[1].position_in_trail
|
|
491
|
+
|
|
492
|
+
# orf name
|
|
493
|
+
last_node = nil
|
|
494
|
+
onodes = trail.otrail.trail.drop_while do |onode|
|
|
495
|
+
onode.node != pair[0].node
|
|
496
|
+
end.take_while do |onode|
|
|
497
|
+
next false if last_node == pair[1].node
|
|
498
|
+
last_node = onode.node
|
|
499
|
+
true
|
|
500
|
+
end
|
|
501
|
+
name = "(#{onodes[0].to_shorthand}:#{pair[0].position_in_node}),#{onodes[1...-1].collect{|onode| onode.to_shorthand}.join(',')},(#{onodes[-1].to_shorthand}:#{pair[1].position_in_node})"
|
|
502
|
+
|
|
503
|
+
to_return[name] ||= sequence[start_position...end_position]
|
|
504
|
+
end
|
|
505
|
+
result.initial_stop_markers.each do |marker|
|
|
506
|
+
end_position = marker.position_in_trail
|
|
507
|
+
|
|
508
|
+
# orf_name
|
|
509
|
+
last_node = nil
|
|
510
|
+
onodes = trail.otrail.trail.take_while do |onode|
|
|
511
|
+
next false if last_node == marker.node
|
|
512
|
+
last_node = onode.node
|
|
513
|
+
true
|
|
514
|
+
end
|
|
515
|
+
name = "#{onodes[0...-1].collect{|onode| onode.to_shorthand}.join(',')},(#{onodes[-1].to_shorthand}:#{marker.position_in_node})"
|
|
516
|
+
|
|
517
|
+
to_return[name] ||= sequence[0...end_position]
|
|
518
|
+
end
|
|
519
|
+
result.final_start_markers.each do |marker|
|
|
520
|
+
start_position = marker.position_in_trail - 3
|
|
521
|
+
|
|
522
|
+
# orf_name
|
|
523
|
+
onodes = trail.otrail.trail.drop_while{|onode| onode.node != marker.node}
|
|
524
|
+
name = "(#{onodes[0].to_shorthand}:#{marker.position_in_node}),#{onodes[1..-1].collect{|onode| onode.to_shorthand}.join(',')}"
|
|
525
|
+
end
|
|
526
|
+
end
|
|
527
|
+
if result.nil? or (result.start_stop_pairs.empty? and result.final_start_markers.empty? and result.initial_stop_markers.empty?)
|
|
528
|
+
name = "#{trail.otrail.to_shorthand}"
|
|
529
|
+
|
|
530
|
+
to_return[name] ||= sequence
|
|
531
|
+
end
|
|
532
|
+
end
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
return to_return
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
# positions of last base of codons
|
|
539
|
+
class Marker
|
|
540
|
+
attr_accessor :position_in_trail, :position_in_node, :node
|
|
541
|
+
|
|
542
|
+
def copy
|
|
543
|
+
copy = Marker.new
|
|
544
|
+
copy.position_in_trail = @position_in_trail
|
|
545
|
+
copy.position_in_node = @position_in_node
|
|
546
|
+
copy.node = @node
|
|
547
|
+
return copy
|
|
548
|
+
end
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
class SearchResult
|
|
552
|
+
attr_accessor :start_markers, :stop_markers
|
|
553
|
+
|
|
554
|
+
def initialize
|
|
555
|
+
@start_markers = []
|
|
556
|
+
@stop_markers = []
|
|
557
|
+
end
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
class ORFsTracingTrail
|
|
561
|
+
attr_accessor :otrail, :fwd_orfs_result, :twin_orfs_result
|
|
562
|
+
include Enumerable
|
|
563
|
+
|
|
564
|
+
def each(&block)
|
|
565
|
+
unless @otrail.nil?
|
|
566
|
+
@otrail.each(&block)
|
|
567
|
+
end
|
|
568
|
+
end
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
class ORFsResult
|
|
572
|
+
attr_accessor :start_stop_pairs, :final_start_markers, :initial_start_markers, :initial_stop_markers
|
|
573
|
+
|
|
574
|
+
def initialize
|
|
575
|
+
@start_stop_pairs = []
|
|
576
|
+
@initial_start_markers = []
|
|
577
|
+
@final_start_markers = []
|
|
578
|
+
@initial_stop_markers = []
|
|
579
|
+
end
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
class AllProblemTrailsFinder
|
|
583
|
+
include Bio::FinishM::Logging
|
|
584
|
+
|
|
585
|
+
def initialize(graph, initial_paths)
|
|
586
|
+
@stack = DS::Stack.new
|
|
587
|
+
initial_paths.each do |path|
|
|
588
|
+
@stack.push path
|
|
589
|
+
end
|
|
590
|
+
@graph = graph
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
def pop
|
|
594
|
+
@stack.pop
|
|
595
|
+
end
|
|
596
|
+
|
|
597
|
+
def size
|
|
598
|
+
@stack.size
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
def push_next_neighbours(current_path)
|
|
602
|
+
next_nodes = current_path.neighbours_of_last_node(@graph)
|
|
603
|
+
log.debug "Pushing #{next_nodes.length} new neighbours of #{current_path.last}" if log.debug?
|
|
604
|
+
#TODO: not neccessary to copy all paths, can just continue one of them
|
|
605
|
+
next_nodes.each do |n|
|
|
606
|
+
log.debug "Pushing neighbour to stack: #{n}" if log.debug?
|
|
607
|
+
path = current_path.copy
|
|
608
|
+
path.add_oriented_node n
|
|
609
|
+
@stack.push path
|
|
610
|
+
end
|
|
611
|
+
end
|
|
612
|
+
end
|
|
613
|
+
end
|
|
614
|
+
end
|
|
615
|
+
end
|