finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,507 @@
|
|
|
1
|
+
|
|
2
|
+
module Bio
|
|
3
|
+
module Velvet
|
|
4
|
+
class Graph
|
|
5
|
+
# Return an Array of OrientedNode objects corresponding to all the nodes that are next
|
|
6
|
+
# in the graph.
|
|
7
|
+
def neighbours_of(node, first_side)
|
|
8
|
+
neighbour_nodes = nil
|
|
9
|
+
if first_side == OrientedNodeTrail::START_IS_FIRST
|
|
10
|
+
neighbour_nodes = neighbours_off_end(node)
|
|
11
|
+
else
|
|
12
|
+
neighbour_nodes = neighbours_into_start(node)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
neighbours_with_orientation = []
|
|
16
|
+
neighbour_nodes.each do |neighbour|
|
|
17
|
+
arcs = get_arcs_by_node node, neighbour
|
|
18
|
+
|
|
19
|
+
# This if statement entered if two nodes are connected twice,
|
|
20
|
+
# in both directions. Remove one direction as it shouldn't be here
|
|
21
|
+
if arcs.length > 1
|
|
22
|
+
if first_side == OrientedNodeTrail::START_IS_FIRST
|
|
23
|
+
arcs = arcs.select do |arc|
|
|
24
|
+
(arc.begin_node_id == node.node_id and arc.begin_node_direction) or
|
|
25
|
+
(arc.end_node_id == node.node_id and !arc.end_node_direction)
|
|
26
|
+
end
|
|
27
|
+
else
|
|
28
|
+
arcs = arcs.select do |arc|
|
|
29
|
+
(arc.end_node_id == node.node_id and arc.end_node_direction) or
|
|
30
|
+
(arc.begin_node_id == node.node_id and !arc.begin_node_direction)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Sometimes, but rarely, two nodes will be joined more than once, for whatever reason
|
|
36
|
+
arcs.each do |arc|
|
|
37
|
+
oriented = OrientedNodeTrail::OrientedNode.new
|
|
38
|
+
oriented.node = neighbour
|
|
39
|
+
if arc.begin_node_id == arc.end_node_id
|
|
40
|
+
# A node connecting to itself. Happens rarely.
|
|
41
|
+
if first_side == OrientedNodeTrail::START_IS_FIRST
|
|
42
|
+
if arc.begin_node_direction and arc.end_node_direction
|
|
43
|
+
oriented.first_side = OrientedNodeTrail::START_IS_FIRST
|
|
44
|
+
elsif arc.begin_node_direction and !arc.end_node_direction
|
|
45
|
+
oriented.first_side = OrientedNodeTrail::END_IS_FIRST
|
|
46
|
+
elsif !arc.begin_node_direction and arc.end_node_direction
|
|
47
|
+
raise "I don't think this is supposed to be possible. Programming error?"
|
|
48
|
+
elsif !arc.begin_node_direction and !arc.end_node_direction
|
|
49
|
+
oriented.first_side = OrientedNodeTrail::START_IS_FIRST
|
|
50
|
+
else
|
|
51
|
+
raise "programming error"
|
|
52
|
+
end
|
|
53
|
+
else
|
|
54
|
+
# coming from the end of the original node
|
|
55
|
+
if arc.begin_node_direction and arc.end_node_direction
|
|
56
|
+
oriented.first_side = OrientedNodeTrail::END_IS_FIRST
|
|
57
|
+
elsif arc.begin_node_direction and !arc.end_node_direction
|
|
58
|
+
raise "I don't think this is supposed to be possible. Programming error?"
|
|
59
|
+
elsif !arc.begin_node_direction and arc.end_node_direction
|
|
60
|
+
oriented.first_side = OrientedNodeTrail::START_IS_FIRST
|
|
61
|
+
elsif !arc.begin_node_direction and !arc.end_node_direction
|
|
62
|
+
oriented.first_side = OrientedNodeTrail::END_IS_FIRST
|
|
63
|
+
else
|
|
64
|
+
raise "programming error"
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
elsif arc.begin_node_id == neighbour.node_id
|
|
69
|
+
# connected to a different node, the 1st in the arc's pair
|
|
70
|
+
if arc.begin_node_direction
|
|
71
|
+
oriented.first_side = OrientedNodeTrail::END_IS_FIRST
|
|
72
|
+
else
|
|
73
|
+
oriented.first_side = OrientedNodeTrail::START_IS_FIRST
|
|
74
|
+
end
|
|
75
|
+
elsif arc.end_node_id == neighbour.node_id
|
|
76
|
+
# connected to a different node, the 2nd in the arc's pair
|
|
77
|
+
if arc.end_node_direction
|
|
78
|
+
oriented.first_side = OrientedNodeTrail::START_IS_FIRST
|
|
79
|
+
else
|
|
80
|
+
oriented.first_side = OrientedNodeTrail::END_IS_FIRST
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
neighbours_with_orientation.push oriented
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
return neighbours_with_orientation
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Like #neighbours_of, except takes a velvet node id rather than a node object
|
|
92
|
+
def neighbours_of_node_id(node_id, first_side)
|
|
93
|
+
neighbours_of(@nodes[node_id], first_side)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# An ordered list of nodes, each with an orientation along that trail
|
|
99
|
+
class OrientedNodeTrail
|
|
100
|
+
include Enumerable
|
|
101
|
+
include Bio::Velvet::Logging
|
|
102
|
+
|
|
103
|
+
attr_accessor :trail
|
|
104
|
+
|
|
105
|
+
START_IS_FIRST = :start_is_first
|
|
106
|
+
END_IS_FIRST = :end_is_first
|
|
107
|
+
|
|
108
|
+
class IllDefinedTrailDefinition < Exception; end
|
|
109
|
+
class InsufficientLengthException < Exception; end
|
|
110
|
+
|
|
111
|
+
# initialize a new path. If an array is given, each element should be a pair:
|
|
112
|
+
# first element of the pair is a node, and the second true/false or
|
|
113
|
+
# START_IS_FIRST/END_IS_FIRST
|
|
114
|
+
def initialize(node_pairs=[])
|
|
115
|
+
@trail = []
|
|
116
|
+
node_pairs.each do |pair|
|
|
117
|
+
node = pair[0]
|
|
118
|
+
dir = pair[1]
|
|
119
|
+
unless node.kind_of?(Bio::Velvet::Graph::Node) and [true, false, START_IS_FIRST, END_IS_FIRST].include?(dir)
|
|
120
|
+
raise "Bad initialisation of OrientedNodeTrail, with #{node_pairs.inspect}, particularly #{pair.inspect}"
|
|
121
|
+
end
|
|
122
|
+
onode = OrientedNode.new
|
|
123
|
+
onode.node = node
|
|
124
|
+
if dir==true
|
|
125
|
+
onode.first_side = START_IS_FIRST
|
|
126
|
+
elsif dir==false
|
|
127
|
+
onode.first_side = END_IS_FIRST
|
|
128
|
+
else
|
|
129
|
+
onode.first_side = dir
|
|
130
|
+
end
|
|
131
|
+
@trail.push onode
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def self.create_from_shorthand(path_string, graph)
|
|
136
|
+
stones = path_string.split(',').collect{|s| s.strip}
|
|
137
|
+
return self.new if stones.length == 0
|
|
138
|
+
trail = []
|
|
139
|
+
stones.each do |stone|
|
|
140
|
+
onode = OrientedNode.new
|
|
141
|
+
if matches = stone.match(/^(\d+)([se])$/)
|
|
142
|
+
node = graph.nodes[matches[1].to_i]
|
|
143
|
+
raise IllDefinedTrailDefinition, "Unable to find node #{matches[1] } in the graph, cannot continue" if node.nil?
|
|
144
|
+
onode.node = node
|
|
145
|
+
|
|
146
|
+
if matches[2] == 's'
|
|
147
|
+
onode.first_side = START_IS_FIRST
|
|
148
|
+
else
|
|
149
|
+
onode.first_side = END_IS_FIRST
|
|
150
|
+
end
|
|
151
|
+
else
|
|
152
|
+
raise IllDefinedTrailDefinition, "Unable to underestand shorthand #{stone}"
|
|
153
|
+
end
|
|
154
|
+
trail.push onode
|
|
155
|
+
end
|
|
156
|
+
path = self.new
|
|
157
|
+
path.trail = trail
|
|
158
|
+
return path
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Given a string like '2,3,4' (super-shorthand form),
|
|
162
|
+
# return the OrientedNodeTrail that thise defines. Raises
|
|
163
|
+
# 'IllDefinedTrailDefinition Exception if there is any ambiguity.
|
|
164
|
+
def self.create_from_super_shorthand(path_string, graph)
|
|
165
|
+
stones = path_string.split(',').collect{|s| s.strip}
|
|
166
|
+
return self.new if stones.length == 0
|
|
167
|
+
if stones.length == 1
|
|
168
|
+
raise IllDefinedTrailDefinition, "Cannot know path orientation when only one node is given"
|
|
169
|
+
end
|
|
170
|
+
state = 'first'
|
|
171
|
+
trail = []
|
|
172
|
+
|
|
173
|
+
stones.each do |str|
|
|
174
|
+
if matches = str.match(/^([01-9]+)$/)
|
|
175
|
+
if state == 'first'
|
|
176
|
+
state = 'second'
|
|
177
|
+
elsif state == 'second'
|
|
178
|
+
# Determine the direction of the first two nodes
|
|
179
|
+
first, second = stones[0..1].collect do |str|
|
|
180
|
+
if matches = str.match(/^([01-9]+)$/)
|
|
181
|
+
node = graph.nodes[matches[1].to_i]
|
|
182
|
+
if node.nil?
|
|
183
|
+
raise IllDefinedTrailDefinition, "Node `#{matches[1] }' from #{path_string} does not appear to be a node ID in the graph"
|
|
184
|
+
end
|
|
185
|
+
OrientedNode.new(node, START_IS_FIRST)
|
|
186
|
+
else
|
|
187
|
+
raise IllDefinedTrailDefinition, "Unable to parse stepping stone along the path: `#{str}'. Entire path was `#{path_string}'."
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
neighbours_of_first_s = first.next_neighbours(graph)
|
|
191
|
+
|
|
192
|
+
rev_first = OrientedNode.new first.node, first.first_side
|
|
193
|
+
rev_first.first_side = END_IS_FIRST
|
|
194
|
+
neighbours_of_first_e = rev_first.next_neighbours(graph)
|
|
195
|
+
|
|
196
|
+
if neighbours_of_first_s.find{|n| n.node_id == second.node_id}
|
|
197
|
+
if neighbours_of_first_e.find{|n| n.node_id == second.node_id}
|
|
198
|
+
raise IllDefinedTrailDefinition, "Both start and end of first node connect to second node, I'm confused."
|
|
199
|
+
else
|
|
200
|
+
seconds = neighbours_of_first_s.select{|n| n.node_id == second.node_id}
|
|
201
|
+
if seconds.length > 1
|
|
202
|
+
raise IllDefinedTrailDefinition, "first node connects to both start and end of second node, I'm confused."
|
|
203
|
+
else
|
|
204
|
+
trail.push first
|
|
205
|
+
trail.push seconds[0]
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
elsif neighbours_of_first_e.find{|n| n.node_id == second.node_id}
|
|
209
|
+
seconds = neighbours_of_first_e.select{|n| n.node_id == second.node_id}
|
|
210
|
+
if seconds.length > 1
|
|
211
|
+
raise IllDefinedTrailDefinition, "first node connects to both start and end of second node, I'm confused."
|
|
212
|
+
else
|
|
213
|
+
trail.push rev_first
|
|
214
|
+
trail.push seconds[0]
|
|
215
|
+
end
|
|
216
|
+
else
|
|
217
|
+
raise IllDefinedTrailDefinition, "First and second nodes do not appear to be directly connected"
|
|
218
|
+
end
|
|
219
|
+
state = 'beyond'
|
|
220
|
+
|
|
221
|
+
else #we are at the third or later node in the path
|
|
222
|
+
last = trail[-1]
|
|
223
|
+
neighbours_of_last = last.next_neighbours(graph)
|
|
224
|
+
nexts = neighbours_of_last.select{|n| n.node_id == matches[1].to_i}
|
|
225
|
+
if nexts.length == 0
|
|
226
|
+
raise IllDefinedTrailDefinition, "Nodes #{last} and #{matches[1] } do not appear to be connected"
|
|
227
|
+
elsif nexts.length > 1
|
|
228
|
+
raise IllDefinedTrailDefinition, "Node #{last} connects to both the start and end of #{matches[1] }, I'm confused"
|
|
229
|
+
else
|
|
230
|
+
trail.push nexts[0]
|
|
231
|
+
last = nexts[0]
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
else #can't regex the text as shorthand stone or super-shorthand stone
|
|
236
|
+
raise "Unable to parse stepping stone along the path: `#{str}'. Entire path was `#{path_string}'."
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
to_return = OrientedNodeTrail.new
|
|
241
|
+
to_return.trail = trail
|
|
242
|
+
return to_return
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Add a node to the trail. start_or_end is either
|
|
246
|
+
# OrientedNodeTrail::START_IS_FIRST or OrientedNodeTrail::END_IS_FIRST
|
|
247
|
+
def add_node(node, start_or_end)
|
|
248
|
+
possible_orientations = [START_IS_FIRST, END_IS_FIRST]
|
|
249
|
+
unless possible_orientations.include?(start_or_end)
|
|
250
|
+
raise "Unexpected orientation in node trail. Need one of #{possible_orientations.inspect}, found #{start_or_end}"
|
|
251
|
+
end
|
|
252
|
+
oriented = OrientedNode.new
|
|
253
|
+
oriented.node = node
|
|
254
|
+
oriented.first_side = start_or_end
|
|
255
|
+
@trail.push oriented
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def add_oriented_node(oriented_node)
|
|
259
|
+
@trail.push oriented_node
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# Given an Array of [node_id, start_or_end] pairs
|
|
263
|
+
# add these to the trail
|
|
264
|
+
def add_setabled_nodes(setabled_nodes, graph)
|
|
265
|
+
setabled_nodes.each do |pair|
|
|
266
|
+
raise "programming error" if pair.length != 2
|
|
267
|
+
add_node graph.nodes[pair[0]], pair[1]
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def each(&block)
|
|
272
|
+
@trail.each(&block)
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def last
|
|
276
|
+
@trail[@trail.length-1]
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def remove_last_node
|
|
280
|
+
@trail.pop
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def delete_at(index)
|
|
284
|
+
@trail.delete_at(index)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def length
|
|
288
|
+
@trail.length
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def [](index)
|
|
292
|
+
@trail[index]
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# Return true if the path contains the oriented
|
|
296
|
+
# node
|
|
297
|
+
def include_oriented_node?(oriented_node)
|
|
298
|
+
@trail.include?(oriented_node)
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Return a list of OrientedNode objects, one for each neighbour
|
|
302
|
+
# of the last node in this path (in the correct direction)
|
|
303
|
+
def neighbours_of_last_node(graph)
|
|
304
|
+
graph.neighbours_of(last.node, last.first_side)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
# Return the sequence of the entire trail, or an empty string if there is no
|
|
308
|
+
# nodes in the trail. For certain (small) configurations of (short) nodes, there may
|
|
309
|
+
# be insufficient information to uniquely determine the sequence of the trail.
|
|
310
|
+
# In that case an exception is thrown.
|
|
311
|
+
def sequence
|
|
312
|
+
return '' if @trail.empty?
|
|
313
|
+
fwd_nodes_sequence, twin_nodes_sequence = sequences_within_path
|
|
314
|
+
missing_length_from_each_side = @trail[0].node.parent_graph.hash_length-1
|
|
315
|
+
if twin_nodes_sequence.length < missing_length_from_each_side
|
|
316
|
+
raise InsufficientLengthException, "Not enough information to know the sequence of a node trail"
|
|
317
|
+
else
|
|
318
|
+
seq_length_required = @trail.collect{|n| n.node.length_alone}.reduce(:+) + missing_length_from_each_side - twin_nodes_sequence.length
|
|
319
|
+
log.debug "first part: #{twin_nodes_sequence}"
|
|
320
|
+
log.debug "second: #{fwd_nodes_sequence[-seq_length_required...fwd_nodes_sequence.length] }"
|
|
321
|
+
return revcom(twin_nodes_sequence)[0...(@trail[0].node.parent_graph.hash_length-1)]+fwd_nodes_sequence
|
|
322
|
+
# calculating this way should be the same, but is somehow buggy in velvet?
|
|
323
|
+
#return revcom(twin_nodes_sequence)+fwd_nodes_sequence[-seq_length_required...fwd_nodes_sequence.length]
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def sequences_within_path
|
|
328
|
+
return '', '' if @trail.empty?
|
|
329
|
+
twin_nodes_sequence = ''
|
|
330
|
+
fwd_nodes_sequence = ''
|
|
331
|
+
@trail.each do |onode|
|
|
332
|
+
if onode.starts_at_start?
|
|
333
|
+
twin_nodes_sequence = onode.node.ends_of_kmers_of_twin_node + twin_nodes_sequence
|
|
334
|
+
fwd_nodes_sequence += onode.node.ends_of_kmers_of_node
|
|
335
|
+
else
|
|
336
|
+
twin_nodes_sequence = onode.node.ends_of_kmers_of_node + twin_nodes_sequence
|
|
337
|
+
fwd_nodes_sequence += onode.node.ends_of_kmers_of_twin_node
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
return fwd_nodes_sequence, twin_nodes_sequence
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
def copy
|
|
344
|
+
o = OrientedNodeTrail.new
|
|
345
|
+
o.trail = Array.new(@trail.collect{|onode| onode.copy})
|
|
346
|
+
return o
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def to_s
|
|
350
|
+
"OrientedNodeTrail: #{object_id}: #{to_shorthand }"
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def to_short_s
|
|
354
|
+
collect do |onode|
|
|
355
|
+
onode.node.node_id
|
|
356
|
+
end.join(',').to_s
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
def inspect
|
|
360
|
+
to_s
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
# Length of a contig made from this path
|
|
364
|
+
def length_in_bp
|
|
365
|
+
return 0 if @trail.empty?
|
|
366
|
+
return length_in_bp_within_path+@trail[0].node.parent_graph.hash_length-1
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
# Length of this trail if it is part of a larger path
|
|
370
|
+
def length_in_bp_within_path
|
|
371
|
+
return 0 if @trail.empty?
|
|
372
|
+
reduce(0) do |total, onode|
|
|
373
|
+
total + onode.node.length_alone
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
def to_shorthand
|
|
378
|
+
shorthand = @trail.collect do |onode|
|
|
379
|
+
[
|
|
380
|
+
onode.node.node_id,
|
|
381
|
+
onode.starts_at_start? ? 's' : 'e'
|
|
382
|
+
].join
|
|
383
|
+
end.join(',')
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
def reverse!
|
|
387
|
+
@trail.reverse!
|
|
388
|
+
@trail.each do |onode|
|
|
389
|
+
onode.reverse!
|
|
390
|
+
end
|
|
391
|
+
nil
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def reverse
|
|
395
|
+
rev = copy
|
|
396
|
+
rev.reverse!
|
|
397
|
+
return rev
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
# The weighted average of coverages along the trail,
|
|
401
|
+
# (weighted by node length)
|
|
402
|
+
def coverage
|
|
403
|
+
total_length = 0
|
|
404
|
+
total_coverage = 0.0
|
|
405
|
+
each do |onode|
|
|
406
|
+
len = onode.node.length_alone
|
|
407
|
+
total_coverage += onode.node.coverage*len
|
|
408
|
+
total_length += len
|
|
409
|
+
end
|
|
410
|
+
return total_coverage / total_length
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
def ==(another)
|
|
414
|
+
return false if trail.length != another.trail.length
|
|
415
|
+
each_with_index do |onode, i|
|
|
416
|
+
return false unless onode == another[i]
|
|
417
|
+
end
|
|
418
|
+
return true
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
class OrientedNode
|
|
422
|
+
attr_accessor :node, :first_side
|
|
423
|
+
|
|
424
|
+
def initialize(node=nil, first_side=nil)
|
|
425
|
+
@node = node
|
|
426
|
+
if first_side == true
|
|
427
|
+
@first_side = OrientedNodeTrail::START_IS_FIRST
|
|
428
|
+
elsif first_side == false
|
|
429
|
+
@first_side = OrientedNodeTrail::END_IS_FIRST
|
|
430
|
+
else
|
|
431
|
+
@first_side = first_side
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def starts_at_start?
|
|
436
|
+
@first_side == OrientedNodeTrail::START_IS_FIRST
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
def starts_at_end?
|
|
440
|
+
@first_side == OrientedNodeTrail::END_IS_FIRST
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
def to_s
|
|
444
|
+
"OrientedNode: node #{@node.node_id}, first_side: #{@first_side}"
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
def to_shorthand
|
|
448
|
+
if @first_side == OrientedNodeTrail::START_IS_FIRST
|
|
449
|
+
return "#{node_id}s"
|
|
450
|
+
else
|
|
451
|
+
return "#{node_id}e"
|
|
452
|
+
end
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# Set#include? doesn't pick up when the same OrientedNode is picked
|
|
456
|
+
# up twice independently, I don't think. So convert to an array first
|
|
457
|
+
def to_settable
|
|
458
|
+
[@node.node_id, @first_side]
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def hash
|
|
462
|
+
to_settable.hash
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
def node_id
|
|
466
|
+
@node.node_id
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
def ==(another)
|
|
470
|
+
@node == another.node and @first_side == another.first_side
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
def next_neighbours(graph)
|
|
474
|
+
graph.neighbours_of @node, @first_side
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
# switch @first_side of this node
|
|
478
|
+
def reverse!
|
|
479
|
+
if @first_side == OrientedNodeTrail::START_IS_FIRST
|
|
480
|
+
@first_side = OrientedNodeTrail::END_IS_FIRST
|
|
481
|
+
elsif @first_side == OrientedNodeTrail::END_IS_FIRST
|
|
482
|
+
@first_side = OrientedNodeTrail::START_IS_FIRST
|
|
483
|
+
else
|
|
484
|
+
raise "programming error"
|
|
485
|
+
end
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
# Return a new OrientedNode with the reverse direction
|
|
489
|
+
def reverse
|
|
490
|
+
rev = OrientedNode.new(@node, @first_side)
|
|
491
|
+
rev.reverse!
|
|
492
|
+
return rev
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
def copy
|
|
496
|
+
OrientedNode.new(@node, @first_side)
|
|
497
|
+
end
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
private
|
|
501
|
+
def revcom(seq)
|
|
502
|
+
Bio::Sequence::NA.new(seq).reverse_complement.to_s.upcase
|
|
503
|
+
end
|
|
504
|
+
end
|
|
505
|
+
end
|
|
506
|
+
end
|
|
507
|
+
end
|