finishm 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cde22b93c4ebc35cf5a598e1d4264104743e0168
|
4
|
+
data.tar.gz: 5e9b6e324cbe45329886c5fbf3e48236a76c0aa8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5e2df4e2e7f9e1173d607bb7980189f942ca48980728945c57e50d0b9a15e110251ac22a93b289c94777d8277f79af381d2a4aa6957da981fd5b705c69a8674a
|
7
|
+
data.tar.gz: d3454311875a095f19da752d016b0bc954af6829d627480e416c071296103b59b9c448df335f53801d962f4a1c0433591ef16ce4bd5cc8e1f6c95133855f1435
|
data/.document
ADDED
data/.gitmodules
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/Gemfile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
# Example:
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
5
|
+
gem 'bio-ipcress'
|
6
|
+
gem 'bio-logger'
|
7
|
+
gem 'bio'
|
8
|
+
gem 'progressbar'
|
9
|
+
gem 'bio-samtools'
|
10
|
+
gem 'ruby-graphviz'
|
11
|
+
gem 'ds'
|
12
|
+
gem 'hopcsv', '~> 0.4'
|
13
|
+
gem 'bio-velvet', '~>0.6'
|
14
|
+
gem 'bio-velvet_underground', '~>0.3'
|
15
|
+
gem 'ruby-progressbar'
|
16
|
+
gem 'yargraph', '~>0.0.4'
|
17
|
+
|
18
|
+
#only needed temporarily until the bio-velvet gem is a proper dependency
|
19
|
+
gem 'files'
|
20
|
+
|
21
|
+
# Add dependencies to develop your gem here.
|
22
|
+
# Include everything needed to run rake, tests, features, etc.
|
23
|
+
group :development do
|
24
|
+
gem "rspec", ">= 2.8.0"
|
25
|
+
gem "yard", ">= 0.7"
|
26
|
+
gem "rdoc", ">= 3.12"
|
27
|
+
gem "bundler", ">= 1.0.0"
|
28
|
+
gem 'jeweler'
|
29
|
+
gem 'bio-commandeer'
|
30
|
+
gem 'pry'
|
31
|
+
end
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Ben J. Woodcroft
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
__WARNING__! FinishM is very alpha software and not ready for prime time. There are many unfinished parts of it, and many bugs. Please use with care, and don't judge the authors too harshly.
|
2
|
+
|
3
|
+
# FinishM
|
4
|
+
|
5
|
+
FinishM attempts to improve draft genomes by considering the computational problem to be about finishing, not assembly in the traditional sense.
|
6
|
+
|
7
|
+
## A finishing approach to assembly
|
8
|
+
Metagenome and isolate assemblers generate contigs from reads, but still leave valuable information on the table. FinishM exploits this information to improve/finish a draft genome without any further laboratory-based work.
|
9
|
+
|
10
|
+
In even a moderately successful assembly, resultant contigs constitute the vast majority of the genome being sequenced, but this fact is ignored by assemblers. Unlike a traditional assembler FinishM does not attempt to directly extend contigs, but instead focuses on connecting already assembled contigs.
|
11
|
+
|
12
|
+
FinishM has several modes:
|
13
|
+
* Attempt to improve a genome. See `finishm roundup`. This mode fulfills both the `wander` and `gapfill` modes.
|
14
|
+
* Determine which contig ends are connected in the assembly graph. See `finishm wander`.
|
15
|
+
* FinishM 'gapfills' (replaces N characters) using a graph-theoretic approach that appears to outperform current gapfilling programs. See `finishm gapfill`.
|
16
|
+
* Sometimes a human is better able to interpret an assembly graph than a machine. FinishM creates human interpretable graph visualisations that let humans solve assembly problems. See `finishm visualise`.
|
17
|
+
* Some other experimental _de-novo_ (non-finishing) metagenome assembly techniques are implemented in `finishm assemble`.
|
18
|
+
|
19
|
+
## Installation
|
20
|
+
|
21
|
+
First, you'll need Ruby (FinishM is tested on 2.1). Then to install:
|
22
|
+
```sh
|
23
|
+
gem install finishm
|
24
|
+
```
|
25
|
+
|
26
|
+
FinishM also has some external dependencies:
|
27
|
+
* clustalo (for `gapfilling`/`roundup`)
|
28
|
+
* GraphViz (for the `visualise` mode)
|
29
|
+
|
30
|
+
## Usage
|
31
|
+
After installation, a listing of the modes and their usage:
|
32
|
+
```sh
|
33
|
+
finishm
|
34
|
+
```
|
35
|
+
|
36
|
+
## Developing
|
37
|
+
To hack on finishm:
|
38
|
+
```
|
39
|
+
git clone https://github.com/wwood/finishm.git
|
40
|
+
cd finishm
|
41
|
+
bundle install
|
42
|
+
git submodule update --init
|
43
|
+
cd ext/src
|
44
|
+
git checkout -b finishm origin/finishm #possibly this step is not required for newer versions of git
|
45
|
+
make MAXKMERLENGTH=255 finishm velveth velvetg
|
46
|
+
cp obj/shared/libfinishm.so.1.0 ../../lib/external/
|
47
|
+
cd ../..
|
48
|
+
./bin/finishm -h
|
49
|
+
```
|
50
|
+
|
51
|
+
## Citation
|
52
|
+
|
53
|
+
A manuscript describing the tools described here is currently in preparation. However, FinishM reuses code from velvet and BioRuby, so these tools may be worth citing.
|
54
|
+
|
55
|
+
## Copyright
|
56
|
+
|
57
|
+
Copyright (c) 2012-2014 Ben J. Woodcroft. See LICENSE.txt for
|
58
|
+
further details.
|
59
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "finishm"
|
18
|
+
gem.homepage = "http://github.com/wwood/finishm"
|
19
|
+
gem.license = "GPL"
|
20
|
+
gem.summary = %Q{Genome improvement and finishing with or without further sequencing effort}
|
21
|
+
gem.description = %Q{De-novo assemblies generally only provide draft genomes. FinishM is aimed at improving these draft assemblies.}
|
22
|
+
gem.email = "donttrustben near gmail.com"
|
23
|
+
gem.authors = ["Ben J. Woodcroft"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
|
26
|
+
gem.extensions = "ext/mkrf_conf.rb"
|
27
|
+
|
28
|
+
# by default, velvet as a git submodule is not included when making the gem
|
29
|
+
# but we need it to be.
|
30
|
+
gem.files.include "ext/src/src/*"
|
31
|
+
gem.files.include "ext/src/Makefile"
|
32
|
+
gem.files.include "ext/src/License"
|
33
|
+
gem.files.include "ext/src/third-party/**/*"
|
34
|
+
end
|
35
|
+
Jeweler::RubygemsDotOrgTasks.new
|
36
|
+
|
37
|
+
require 'rspec/core'
|
38
|
+
require 'rspec/core/rake_task'
|
39
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
40
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
41
|
+
end
|
42
|
+
|
43
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
44
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
45
|
+
spec.rcov = true
|
46
|
+
end
|
47
|
+
|
48
|
+
task :default => :spec
|
49
|
+
|
50
|
+
require 'yard'
|
51
|
+
YARD::Rake::YardocTask.new
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
@@ -0,0 +1,106 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'bio-logger'
|
5
|
+
require 'bio-velvet'
|
6
|
+
require 'graphviz'
|
7
|
+
require 'bio'
|
8
|
+
require 'set'
|
9
|
+
|
10
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
|
11
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
12
|
+
require 'priner'
|
13
|
+
|
14
|
+
# Parse command line options into the options hash
|
15
|
+
options = {
|
16
|
+
:logger => 'stderr',
|
17
|
+
:log_level => 'info',
|
18
|
+
:start_kmers => [],
|
19
|
+
:end_kmers => [],
|
20
|
+
:coverage_cutoff => 0.0,
|
21
|
+
}
|
22
|
+
o = OptionParser.new do |opts|
|
23
|
+
opts.banner = "
|
24
|
+
Usage: #{SCRIPT_NAME} --velvet-graph PreGraphFile [options]
|
25
|
+
|
26
|
+
Take a graph pre-computed with velveth, and output a GraphViz file for visualisation.
|
27
|
+
|
28
|
+
Overlayed on top of this graph can be added information e.g. nodes that contain
|
29
|
+
particular kmers get coloured.
|
30
|
+
\n\n"
|
31
|
+
|
32
|
+
opts.on("--velvet-graph GRAPH_FILE", "PreGraph file output from velveth [required]") do |arg|
|
33
|
+
options[:velvet_pregraph_file] = arg
|
34
|
+
end
|
35
|
+
|
36
|
+
opts.separator "\nOptional arguments:\n\n"
|
37
|
+
opts.on("--dot OUTPUT_DOT_FILENAME", "Output the graph into PNG format [default: not output as DOT]") do |arg|
|
38
|
+
options[:dot_output_file] = arg
|
39
|
+
end
|
40
|
+
opts.on("--png OUTPUT_PNG_FILENAME", "Output the graph into PNG format [default: not output as PNG]") do |arg|
|
41
|
+
options[:neato_png] = arg
|
42
|
+
end
|
43
|
+
opts.on("--start-kmers-file FILE", "Path to file containing newline-separated kmers that are associated with the start of the assembly [default: none]") do |arg|
|
44
|
+
options[:start_kmers] = File.open(arg).read.split(/\s+/)
|
45
|
+
end
|
46
|
+
opts.on("--end-kmers-file FILE", "Path to file containing newline-separated kmers that are associated with the end of the assembly [default: none]") do |arg|
|
47
|
+
options[:end_kmers] = File.open(arg).read.split(/\s+/)
|
48
|
+
end
|
49
|
+
opts.on("--coverage-cutoff NUMBER", "Require at least this much coverage, otherwise the node and associated edges are not shown in the output [default: #{options[:coverage_cutoff]}]") do |arg|
|
50
|
+
options[:coverage_cutoff] = arg.to_f
|
51
|
+
end
|
52
|
+
|
53
|
+
# logger options
|
54
|
+
opts.separator "\nVerbosity:\n\n"
|
55
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
|
56
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
57
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
|
58
|
+
end; o.parse!
|
59
|
+
if ARGV.length != 0 or options[:velvet_pregraph_file].nil?
|
60
|
+
$stderr.puts o
|
61
|
+
exit 1
|
62
|
+
end
|
63
|
+
if !(options[:dot_output_file] or options[:neato_png])
|
64
|
+
$stderr.puts "Need to specify an output format with e.g. --png or --dot"
|
65
|
+
exit 1
|
66
|
+
end
|
67
|
+
# Setup logging
|
68
|
+
Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
69
|
+
|
70
|
+
|
71
|
+
log.info "Parsing graph from #{options[:velvet_pregraph_file]}"
|
72
|
+
graph = Bio::Velvet::Graph.parse_from_file(options[:velvet_pregraph_file])
|
73
|
+
log.info "Finished parsing graph, found #{graph.nodes.length} nodes and #{graph.arcs.length} arcs"
|
74
|
+
|
75
|
+
if options[:start_kmers].length > 0
|
76
|
+
log.info "Read in #{options[:start_kmers].length} kmers associated with the start of the assembly"
|
77
|
+
end
|
78
|
+
if options[:end_kmers].length > 0
|
79
|
+
log.info "Read in #{options[:end_kmers].length} kmers associated with the end of the assembly"
|
80
|
+
end
|
81
|
+
list_of_start_kmers = options[:start_kmers].collect{|k| [k, Bio::Sequence::NA.new(k).reverse_complement.to_s]}.flatten
|
82
|
+
list_of_end_kmers = options[:end_kmers].collect{|k| [k, Bio::Sequence::NA.new(k).reverse_complement.to_s]}.flatten
|
83
|
+
|
84
|
+
if options[:coverage_cutoff]
|
85
|
+
cutter = Bio::AssemblyGraphAlgorithms::CoverageBasedGraphFilter.new
|
86
|
+
log.info "Removing low coverage (<#{options[:coverage_cutoff]}) nodes"
|
87
|
+
cutter.remove_low_coverage_nodes(graph, options[:coverage_cutoff])
|
88
|
+
log.info "After removing low coverage nodes, there is #{graph.nodes.length} nodes and #{graph.arcs.length} arcs"
|
89
|
+
end
|
90
|
+
|
91
|
+
viser = Bio::Assembly::ABVisualiser.new
|
92
|
+
log.info "Converting assembly to GraphViz format"
|
93
|
+
graphviz = viser.graphviz(graph, {:start_kmers => list_of_start_kmers, :end_kmers => list_of_end_kmers})
|
94
|
+
|
95
|
+
# Print
|
96
|
+
log.info "Printing assembly graph"
|
97
|
+
if options[:dot_output_file]
|
98
|
+
graphviz.output :dot => options[:dot_output_file]
|
99
|
+
end
|
100
|
+
if options[:neato_png]
|
101
|
+
graphviz.output :png => options[:neato_png]
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
@@ -0,0 +1,73 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'bio-logger'
|
5
|
+
|
6
|
+
$:.unshift File.join(ENV['HOME'],'git','bioruby-primer3','lib')
|
7
|
+
require 'bio-primer3'
|
8
|
+
|
9
|
+
if __FILE__ == $0 #needs to be removed if this script is distributed as part of a rubygem
|
10
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
|
11
|
+
|
12
|
+
# Parse command line options into the options hash
|
13
|
+
options = {
|
14
|
+
:logger => 'stderr',
|
15
|
+
}
|
16
|
+
o = OptionParser.new do |opts|
|
17
|
+
opts.banner = "
|
18
|
+
Usage: #{SCRIPT_NAME} -p1 <primer1> -f2 <primer_list_file>
|
19
|
+
|
20
|
+
Uses primer3's \"check primers\" to find whether primers match against each other\n\n"
|
21
|
+
|
22
|
+
opts.on("--primer1 PRIMER", "Primer on one side [required]") do |arg|
|
23
|
+
options[:primer1] = arg
|
24
|
+
end
|
25
|
+
opts.on("--primers2 PRIMER_FILE", "A list of primers in a file, newline separated [required]") do |arg|
|
26
|
+
options[:primers2_file] = arg
|
27
|
+
end
|
28
|
+
|
29
|
+
# logger options
|
30
|
+
opts.separator "\nVerbosity:\n\n"
|
31
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {Bio::Log::CLI.trace('error')}
|
32
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
33
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| Bio::Log::CLI.trace(s)}
|
34
|
+
end; o.parse!
|
35
|
+
if ARGV.length != 0
|
36
|
+
$stderr.puts o
|
37
|
+
exit 1
|
38
|
+
end
|
39
|
+
# Setup logging. bio-logger defaults to STDERR not STDOUT, I disagree
|
40
|
+
Bio::Log::CLI.logger(options[:logger]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
41
|
+
|
42
|
+
|
43
|
+
# Read in input data
|
44
|
+
primers1 = [options[:primer1]]
|
45
|
+
primers2 = File.open(options[:primers2_file]).read.split("\n").collect{|c| c.strip}
|
46
|
+
log.info "Read in #{primers1.length} left primers and #{primers2.length} right primers e.g. #{primers1[0]} and #{primers2[0]}"
|
47
|
+
|
48
|
+
goods = 0
|
49
|
+
bads = 0
|
50
|
+
failed_to_run = 0
|
51
|
+
primers1.each do |primer1|
|
52
|
+
primers2.each do |primer2|
|
53
|
+
begin
|
54
|
+
result, obj = Bio::Primer3.test_primer_compatibility primer1, primer2, 'PRIMER_EXPLAIN_FLAG'=>1
|
55
|
+
|
56
|
+
puts [
|
57
|
+
primer1, primer2, result, obj['PRIMER_LEFT_EXPLAIN'], obj['PRIMER_RIGHT_EXPLAIN']
|
58
|
+
].join "\t"
|
59
|
+
|
60
|
+
if result
|
61
|
+
goods += 1
|
62
|
+
else
|
63
|
+
bads += 1
|
64
|
+
end
|
65
|
+
|
66
|
+
rescue Exception => e
|
67
|
+
failed_to_run += 1
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
log.info "Found #{goods} OK primer pairs and #{bads} not OK primer pairs"
|
72
|
+
log.warn "#{failed_to_run} weren't checked by Primer3 because it failed to run" if failed_to_run > 0
|
73
|
+
end #end if running as a script
|
@@ -0,0 +1,244 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'bio-logger'
|
5
|
+
require 'bio-velvet'
|
6
|
+
require 'tempfile'
|
7
|
+
require 'pp'
|
8
|
+
|
9
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
|
10
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
11
|
+
require 'priner'
|
12
|
+
|
13
|
+
# Parse command line options into the options hash
|
14
|
+
options = {
|
15
|
+
:logger => 'stderr',
|
16
|
+
:log_level => 'info',
|
17
|
+
:velvet_kmer_size => 73,#TODO: these options should be exposed to the user, and perhaps not guessed at
|
18
|
+
:velvetg_arguments => '-read_trkg yes',# -exp_cov 41 -cov_cutoff 12.0973243610491', #hack
|
19
|
+
:contig_end_length => 300,
|
20
|
+
:output_assembly_path => 'velvetAssembly',
|
21
|
+
:graph_search_leash_length => 3000,
|
22
|
+
}
|
23
|
+
o = OptionParser.new do |opts|
|
24
|
+
opts.banner = "
|
25
|
+
Usage: #{SCRIPT_NAME} --reads <read_file> --contigs <contigs_file>
|
26
|
+
|
27
|
+
Takes a set of reads and a set of contigs. Then it runs an assembly based on those reads,
|
28
|
+
and tries to fill in possible gaps between the contigs. There may be multiple ways
|
29
|
+
to join two contig ends together - in this that multiple cases are reported. \n\n"
|
30
|
+
|
31
|
+
|
32
|
+
opts.on("--reads FILE", "gzipped fastq file of reads to perform the re-assembly with [required]") do |arg|
|
33
|
+
options[:reads_file] = arg
|
34
|
+
end
|
35
|
+
opts.on("--contigs FILE", "fasta file of contigs to be joined together [required]") do |arg|
|
36
|
+
options[:contigs_file] = arg
|
37
|
+
end
|
38
|
+
|
39
|
+
opts.separator "\nOptional arguments:\n\n"
|
40
|
+
opts.on("--output-trails-fasta PATH", "Output found paths to this file in fasta format [default: off]") do |arg|
|
41
|
+
options[:overall_trail_output_fasta_file] = arg
|
42
|
+
end
|
43
|
+
opts.on("--already-assembled-velvet-directory PATH", "Skip until after assembly in this process, and start from this assembly directory created during a previous run of this script [default: off]") do |arg|
|
44
|
+
options[:previous_assembly] = arg
|
45
|
+
end
|
46
|
+
opts.on("--serialize-velvet-graph FILE", "So that the velvet graph does not have to be reparsed, serialise the parsed object for later use in this file [default: off]") do |arg|
|
47
|
+
options[:serialize_parsed_graph_file] = arg
|
48
|
+
end
|
49
|
+
opts.on("--already-serialized-velvet-graph FILE", "Restore the parsed velvet graph from this file [default: off]") do |arg|
|
50
|
+
options[:previously_serialized_parsed_graph_file] = arg
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# logger options
|
55
|
+
opts.separator "\nVerbosity:\n\n"
|
56
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
|
57
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
58
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
|
59
|
+
end; o.parse!
|
60
|
+
if ARGV.length != 0 or options[:reads_file].nil? or options[:contigs_file].nil?
|
61
|
+
$stderr.puts o
|
62
|
+
exit 1
|
63
|
+
end
|
64
|
+
# Setup logging
|
65
|
+
Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
66
|
+
Bio::Log::LoggerPlus.new 'bio-velvet'; Bio::Log::CLI.configure 'bio-velvet'
|
67
|
+
|
68
|
+
# Extract contig ends from each of the input contigs, so that the contig ends can be found in the
|
69
|
+
# assembly graph structure.
|
70
|
+
contig_ends = []
|
71
|
+
velvet_sequence_id_to_contig_end = {}
|
72
|
+
contig_lengths = {}
|
73
|
+
class ContigEnd
|
74
|
+
attr_accessor :sequence, :start_or_end, :contig_name, :velvet_sequence_id
|
75
|
+
end
|
76
|
+
velvet_read_index = 1
|
77
|
+
Bio::FlatFile.foreach(options[:contigs_file]) do |seq|
|
78
|
+
contig_lengths[seq.definition] = seq.seq.length
|
79
|
+
if seq.seq.length < options[:contig_end_length]
|
80
|
+
log.warn "Contig #{seq.definition} is shorter than the end length used to anchor the contig in the assembly. This is not ideal but may be ok."
|
81
|
+
#TODO: fix this - should be counting from the middle. Should I just ignore those ones?
|
82
|
+
end
|
83
|
+
# Add the start of the contig
|
84
|
+
contig_end = ContigEnd.new
|
85
|
+
contig_end.start_or_end = :start
|
86
|
+
contig_end.sequence = Bio::Sequence::NA.new(seq.seq[0...options[:contig_end_length]]).reverse_complement.to_s
|
87
|
+
contig_end.contig_name = seq.definition
|
88
|
+
velvet_sequence_id_to_contig_end[velvet_read_index] = contig_end
|
89
|
+
contig_end.velvet_sequence_id = velvet_read_index; velvet_read_index += 1
|
90
|
+
contig_ends.push contig_end
|
91
|
+
|
92
|
+
|
93
|
+
# Add the back of the contig
|
94
|
+
contig_end = ContigEnd.new
|
95
|
+
contig_end.start_or_end = :end
|
96
|
+
s = seq.seq
|
97
|
+
contig_end.sequence = s[s.length-options[:contig_end_length]...s.length]
|
98
|
+
contig_end.contig_name = seq.definition
|
99
|
+
velvet_sequence_id_to_contig_end[velvet_read_index] = contig_end
|
100
|
+
contig_end.velvet_sequence_id = velvet_read_index; velvet_read_index += 1
|
101
|
+
contig_ends.push contig_end
|
102
|
+
end
|
103
|
+
log.info "Parsed in #{contig_ends.length} contig ends from the two sides of each input contig"
|
104
|
+
|
105
|
+
|
106
|
+
graph = nil
|
107
|
+
if options[:previously_serialized_parsed_graph_file].nil?
|
108
|
+
velvet_result = nil
|
109
|
+
if options[:previous_assembly].nil? #If assembly has not already been carried out
|
110
|
+
Tempfile.open('anchors.fa') do |tempfile|
|
111
|
+
contig_ends.each do |contig_end|
|
112
|
+
tempfile.puts ">anchor#{contig_end.velvet_sequence_id}"
|
113
|
+
tempfile.puts contig_end.sequence
|
114
|
+
end
|
115
|
+
|
116
|
+
log.info "Assembling sampled reads with velvet"
|
117
|
+
# Bit of a hack, but have to use -short1 as the anchors because then start and end anchors will have node IDs 1,2,... etc.
|
118
|
+
velvet_result = Bio::Velvet::Runner.new.velvet(
|
119
|
+
options[:velvet_kmer_size],
|
120
|
+
"-short #{tempfile.path} -short2 -fastq.gz #{options[:reads_file]}",
|
121
|
+
options[:velvetg_arguments],
|
122
|
+
:output_assembly_path => options[:output_assembly_path]
|
123
|
+
)
|
124
|
+
if log.debug?
|
125
|
+
log.debug "velveth stdout: #{velvet_result.velveth_stdout}"
|
126
|
+
log.debug "velveth stderr: #{velvet_result.velveth_stderr}"
|
127
|
+
log.debug "velvetg stdout: #{velvet_result.velvetg_stdout}"
|
128
|
+
log.debug "velvetg stderr: #{velvet_result.velvetg_stderr}"
|
129
|
+
end
|
130
|
+
log.info "Finished running assembly"
|
131
|
+
end
|
132
|
+
else
|
133
|
+
log.info "Using previous assembly stored at #{options[:previous_assembly]}"
|
134
|
+
velvet_result = Bio::Velvet::Result.new
|
135
|
+
velvet_result.result_directory = options[:previous_assembly]
|
136
|
+
end
|
137
|
+
|
138
|
+
require 'ruby-prof'
|
139
|
+
RubyProf.start
|
140
|
+
|
141
|
+
log.info "Parsing the graph output from velvet"
|
142
|
+
graph = Bio::Velvet::Graph.parse_from_file(File.join velvet_result.result_directory, 'Graph2')
|
143
|
+
log.info "Finished parsing graph: found #{graph.nodes.length} nodes and #{graph.arcs.length} arcs"
|
144
|
+
|
145
|
+
result = RubyProf.stop
|
146
|
+
printer = RubyProf::FlatPrinter.new(result)
|
147
|
+
printer.print(STDOUT)
|
148
|
+
|
149
|
+
if options[:serialize_parsed_graph_file]
|
150
|
+
log.info "Storing a binary version of the graph file for later use at #{options[:serialize_parsed_graph_file]}"
|
151
|
+
File.open(options[:serialize_parsed_graph_file],'wb') do |f|
|
152
|
+
f.print Marshal.dump(graph)
|
153
|
+
end
|
154
|
+
log.info "Stored a binary representation of the velvet graph at #{options[:serialize_parsed_graph_file]}"
|
155
|
+
end
|
156
|
+
else
|
157
|
+
log.info "Restoring graph file from #{options[:previously_serialized_parsed_graph_file]}.."
|
158
|
+
graph = Marshal.load(File.open(options[:previously_serialized_parsed_graph_file]))
|
159
|
+
log.info "Restoration complete"
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
# Find the anchoring nodes for each of the contig ends
|
165
|
+
finder = Bio::AssemblyGraphAlgorithms::NodeFinder.new
|
166
|
+
log.info "Finding node representing the end of the each contig"
|
167
|
+
i = 1
|
168
|
+
anchor_sequence_ids = contig_ends.collect{|c| c.velvet_sequence_id}
|
169
|
+
anchoring_nodes_and_directions = finder.find_unique_nodes_with_sequence_ids(graph, anchor_sequence_ids)
|
170
|
+
num_anchors_found = anchoring_nodes_and_directions.reject{|s,e| e[0].nil?}.length
|
171
|
+
anchoring_node_id_to_contig_end = {}
|
172
|
+
anchoring_nodes_and_directions.each do |seq_id, node_and_direction|
|
173
|
+
next if node_and_direction[0].nil? #skip when there is no node found in the graph for this contig end
|
174
|
+
anchoring_node_id_to_contig_end[node_and_direction[0].node_id] = velvet_sequence_id_to_contig_end[seq_id]
|
175
|
+
end
|
176
|
+
log.info "Found anchoring nodes for #{num_anchors_found} out of #{contig_ends.length} contig ends"
|
177
|
+
|
178
|
+
log.info "Searching for trails between the nodes within the assembly graph"
|
179
|
+
cartographer = Bio::AssemblyGraphAlgorithms::AcyclicConnectionFinder.new
|
180
|
+
trail_sets = cartographer.find_trails_between_node_set(graph, anchoring_nodes_and_directions.values.reject{|v| v[0].nil?}, options[:graph_search_leash_length])
|
181
|
+
log.info "Found #{trail_sets.reduce(0){|s,set|s+=set.length}} trail(s) in total"
|
182
|
+
|
183
|
+
node_id_to_contig_description = {}
|
184
|
+
anchoring_nodes_and_directions.each do |seq_id, pair|
|
185
|
+
next if pair.empty? #When no nodes were found
|
186
|
+
node_id = pair[0].node_id
|
187
|
+
node_id_to_contig_description[node_id] = velvet_sequence_id_to_contig_end[seq_id]
|
188
|
+
end
|
189
|
+
contig_end_id_to_partners = {}
|
190
|
+
# Tabulate all the partners each way (complete the previously triangular matrix)
|
191
|
+
trail_sets.each do |trail_set|
|
192
|
+
trail_set.each do |trail|
|
193
|
+
start_id = trail.first.node.node_id
|
194
|
+
end_id = trail.last.node.node_id
|
195
|
+
contig_end_id_to_partners[start_id] ||= []
|
196
|
+
contig_end_id_to_partners[start_id].push node_id_to_contig_description[end_id]
|
197
|
+
contig_end_id_to_partners[end_id] ||= []
|
198
|
+
contig_end_id_to_partners[end_id].push node_id_to_contig_description[start_id]
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
puts %w(contig_end_id contig_name contig_length connections).join "\t"
|
203
|
+
trail_sets.each_with_index do |trail_set, i|
|
204
|
+
partner_contig_ends = contig_end_id_to_partners[contig_ends[i].velvet_sequence_id]
|
205
|
+
partner_contig_ends ||= []
|
206
|
+
# Each contig has 2 trail sets associated with it - one for the start and one for the end
|
207
|
+
puts [
|
208
|
+
contig_ends[i].velvet_sequence_id,
|
209
|
+
contig_ends[i].contig_name,
|
210
|
+
contig_lengths[contig_ends[i].contig_name],
|
211
|
+
partner_contig_ends.collect{|c| c.velvet_sequence_id}.sort.join(',')
|
212
|
+
].join("\t")
|
213
|
+
end
|
214
|
+
|
215
|
+
if options[:overall_trail_output_fasta_file]
|
216
|
+
File.open(options[:overall_trail_output_fasta_file],'w') do |outfile|
|
217
|
+
trail_sets.each do |trail_set|
|
218
|
+
trail_set.each do |trail|
|
219
|
+
begin
|
220
|
+
trail_sequence = trail.sequence #Get the trail sequence first as this may not be possible.
|
221
|
+
|
222
|
+
start_id = trail.first.node.node_id
|
223
|
+
end_id = trail.last.node.node_id
|
224
|
+
start_contig_end = anchoring_node_id_to_contig_end[start_id]
|
225
|
+
end_contig_end = anchoring_node_id_to_contig_end[end_id]
|
226
|
+
outfile.print '>'
|
227
|
+
outfile.print start_contig_end.contig_name
|
228
|
+
outfile.print '_'
|
229
|
+
outfile.print start_contig_end.start_or_end
|
230
|
+
outfile.print ':'
|
231
|
+
outfile.print end_contig_end.contig_name
|
232
|
+
outfile.print '_'
|
233
|
+
outfile.puts end_contig_end.start_or_end
|
234
|
+
|
235
|
+
outfile.puts trail_sequence
|
236
|
+
rescue Bio::Velvet::NotImplementedException => e
|
237
|
+
log.warn "Problem getting sequence of found trail #{trail.to_s}, skipping this trail: #{e.to_s}"
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
|