finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'optparse'
|
|
4
|
+
require 'bio-logger'
|
|
5
|
+
require 'systemu'
|
|
6
|
+
|
|
7
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
|
|
8
|
+
|
|
9
|
+
# Parse command line options into the options hash
|
|
10
|
+
options = {
|
|
11
|
+
:logger => 'stderr',
|
|
12
|
+
:log_level => 'info',
|
|
13
|
+
}
|
|
14
|
+
o = OptionParser.new do |opts|
|
|
15
|
+
opts.banner = "
|
|
16
|
+
Usage: #{SCRIPT_NAME} -b <contigs_against_assembly.blast_outfmt6.csv>
|
|
17
|
+
|
|
18
|
+
Takes a set of contigs, and an assembly. Works out if there are any contigs where there is a blast hit spanning of the contigs using two of the assembly's contig ends.\n\n"
|
|
19
|
+
|
|
20
|
+
opts.on("--query FASTA_FILE", "new contigs fasta file [Required]") do |arg|
|
|
21
|
+
options[:query_file] = arg
|
|
22
|
+
end
|
|
23
|
+
opts.on("--blastdb FASTA_FILE_FORMATTED", "basename of makeblastdb output [Required]") do |arg|
|
|
24
|
+
options[:blastdb] = arg
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# logger options
|
|
28
|
+
opts.separator "\nVerbosity:\n\n"
|
|
29
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
|
|
30
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
|
31
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
|
|
32
|
+
end; o.parse!
|
|
33
|
+
if ARGV.length != 0 or options[:query_file].nil? or options[:blastdb].nil?
|
|
34
|
+
$stderr.puts o
|
|
35
|
+
exit 1
|
|
36
|
+
end
|
|
37
|
+
# Setup logging
|
|
38
|
+
Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Read in the blast file
|
|
42
|
+
blast_results = []
|
|
43
|
+
class BlastResult
|
|
44
|
+
attr_accessor :qseqid, :sseqid, :pident, :length, :mismatch, :gapopen, :qstart, :qend, :sstart, :subject_end, :evalue, :bitscore, :query_length, :subject_length
|
|
45
|
+
|
|
46
|
+
attr_accessor :cutoff_inwards
|
|
47
|
+
|
|
48
|
+
def initialize
|
|
49
|
+
@cutoff_inwards = 500
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def hits_end_of_subject?
|
|
53
|
+
@subject_end >= @subject_length-@cutoff_inwards and @length >= 100
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def hits_start_of_subject?
|
|
57
|
+
@sstart <= @cutoff_inwards and @length >= 100
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def hits_end_of_query?
|
|
61
|
+
@qend >= @query_length-@cutoff_inwards and @length >= 100
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def hits_start_of_query?
|
|
65
|
+
@qstart <= @cutoff_inwards and @length >= 100
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
status, blast_output, stderr = systemu "blastn -query #{options[:query_file].inspect} -db #{options[:blastdb].inspect} -outfmt '6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen slen' -evalue 1e-5"
|
|
70
|
+
raise stderr unless stderr==""
|
|
71
|
+
raise "bad status running blast" unless status.exitstatus == 0
|
|
72
|
+
log.debug "Finished running blast, presumably successfully"
|
|
73
|
+
|
|
74
|
+
blast_output.each_line do |line|
|
|
75
|
+
res = BlastResult.new
|
|
76
|
+
row = line.chomp.split "\t"
|
|
77
|
+
[:qseqid, :sseqid, :pident, :length, :mismatch, :gapopen, :qstart,
|
|
78
|
+
:qend, :sstart, :subject_end, :evalue, :bitscore,
|
|
79
|
+
:query_length, :subject_length].each_with_index do |attr, i|
|
|
80
|
+
res.send "#{attr}=".to_sym, row[i]
|
|
81
|
+
end
|
|
82
|
+
[:length, :mismatch, :gapopen, :qstart,
|
|
83
|
+
:qend, :sstart, :subject_end,:query_length, :subject_length].each do |attr|
|
|
84
|
+
res.send "#{attr}=".to_sym, res.send(attr).to_i
|
|
85
|
+
end
|
|
86
|
+
[:pident, :evalue, :bitscore].each do |attr|
|
|
87
|
+
res.send "#{attr}=".to_sym, res.send(attr).to_f
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
blast_results.push res
|
|
91
|
+
end
|
|
92
|
+
log.info "Parsed #{blast_results.length} blast results e.g. #{blast_results[0].inspect}"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
query_to_blast_results = {}
|
|
96
|
+
hit_to_blast_results = {}
|
|
97
|
+
blast_results.each do |result|
|
|
98
|
+
query_to_blast_results[result.qseqid] ||= []
|
|
99
|
+
query_to_blast_results[result.qseqid].push result
|
|
100
|
+
|
|
101
|
+
hit_to_blast_results[result.sseqid] ||= []
|
|
102
|
+
hit_to_blast_results[result.sseqid].push result
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# For each query sequence, does it map to the ends of both contigs
|
|
106
|
+
header = %w(query subject1 subject2 qstart1? qend1? sstart1? send1? qstart2? qend2? sstart2? send2?).join("\t")
|
|
107
|
+
query_to_blast_results.each do |query_id, hits|
|
|
108
|
+
query_length = hits[0].query_length
|
|
109
|
+
keepers = []
|
|
110
|
+
|
|
111
|
+
hits.each do |hit|
|
|
112
|
+
# perfect if it hits the start or the end (but not both) of both the query and the subject, unless it is circular
|
|
113
|
+
if hit.hits_start_of_query? ^ hit.hits_end_of_query? and
|
|
114
|
+
hit.hits_start_of_subject? ^ hit.hits_end_of_subject?
|
|
115
|
+
keepers.push hit
|
|
116
|
+
elsif hit.hits_start_of_query? or hit.hits_end_of_query? or
|
|
117
|
+
hit.hits_start_of_subject? or hit.hits_end_of_subject?
|
|
118
|
+
log.info "There's a half-correct hit for #{query_id}: qstart? #{hit.hits_start_of_query?} qend #{hit.hits_end_of_query?} "+
|
|
119
|
+
"sstart #{hit.hits_start_of_subject?} send #{hit.hits_end_of_subject?}, to subject sequence #{hit.sseqid}"
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
if keepers.empty?
|
|
124
|
+
log.debug "no latchings found for #{query_id}"
|
|
125
|
+
elsif keepers.length == 1
|
|
126
|
+
log.info "Query #{query_id} only latches on to a single end, maybe manually inspect"
|
|
127
|
+
elsif keepers.length == 2
|
|
128
|
+
log.debug "Query #{query_id} has 2 keepers!"
|
|
129
|
+
q = keepers.collect{|hit| hit.hits_start_of_query?}.join
|
|
130
|
+
s = keepers.collect{|hit| hit.hits_start_of_subject?}.join
|
|
131
|
+
if (q == 'truefalse' or q == 'falsetrue') and
|
|
132
|
+
(s == 'truefalse' or s == 'falsetrue')
|
|
133
|
+
outs = (0..1).collect{|i|
|
|
134
|
+
[
|
|
135
|
+
keepers[i].hits_start_of_query?,
|
|
136
|
+
keepers[i].hits_end_of_query?,
|
|
137
|
+
keepers[i].hits_start_of_subject?,
|
|
138
|
+
keepers[i].hits_end_of_subject?,
|
|
139
|
+
]
|
|
140
|
+
}.flatten
|
|
141
|
+
unless header.nil?
|
|
142
|
+
puts header
|
|
143
|
+
header = nil
|
|
144
|
+
end
|
|
145
|
+
puts [query_id, keepers[0].sseqid, keepers[1].sseqid, outs].flatten.join("\t")
|
|
146
|
+
else
|
|
147
|
+
log.info "Query #{query_id} has 2 keepers, but they are fighting it seems"
|
|
148
|
+
end
|
|
149
|
+
else
|
|
150
|
+
log.info "More than 2 keepers found for #{query_id}, manual inspection likely required"
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
data/bin/finishm
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'optparse'
|
|
4
|
+
require 'bio-logger'
|
|
5
|
+
require 'bio-velvet'
|
|
6
|
+
require 'pp'
|
|
7
|
+
|
|
8
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
|
|
9
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
|
10
|
+
require 'priner'
|
|
11
|
+
|
|
12
|
+
# Parse command line options into the options hash
|
|
13
|
+
options = {
|
|
14
|
+
:logger => 'stderr',
|
|
15
|
+
:log_level => 'info',
|
|
16
|
+
}
|
|
17
|
+
global = OptionParser.new do |opts|
|
|
18
|
+
opts.banner = "
|
|
19
|
+
Usage: #{SCRIPT_NAME} <command> [<arguments>]
|
|
20
|
+
|
|
21
|
+
FinishM is a collection of tasks related to assembly and metagenome assembly. Available commands:
|
|
22
|
+
|
|
23
|
+
wander\tTry to connect contigs (experimental)
|
|
24
|
+
gapfill\tFill assembly gaps (N characters) (experimental)
|
|
25
|
+
explore\tWhat happens in the graph beyond the end of my contig(s)? (experimental)
|
|
26
|
+
visualise\tVisualise the DeBruijn graph (experimental)
|
|
27
|
+
|
|
28
|
+
Commands for PCR finishing:
|
|
29
|
+
|
|
30
|
+
primers\tdesign primers for multi-primer multi-lane PCR experimental setup (experimental)
|
|
31
|
+
primers_check\ttest a set of primers for incompatibility (experimental)
|
|
32
|
+
finish\tprocess results from multi-primer multi-lane PCR experimental setup (experimental)
|
|
33
|
+
|
|
34
|
+
Utility modes:
|
|
35
|
+
|
|
36
|
+
sequence\tGiven a defined sequence of nodes, what is the corresponding sequence?
|
|
37
|
+
count_paths\tCount the number of paths through assembly graph
|
|
38
|
+
find_orfs\tFind possible open reading frames in assembly graph
|
|
39
|
+
\n\n"
|
|
40
|
+
end
|
|
41
|
+
global.order!
|
|
42
|
+
|
|
43
|
+
operator = nil
|
|
44
|
+
subcommands = {
|
|
45
|
+
'primers' => lambda {OptionParser.new do |opts|
|
|
46
|
+
operator = Bio::FinishM::Primers.new
|
|
47
|
+
operator.add_options(opts, options)
|
|
48
|
+
end},
|
|
49
|
+
'primers_check' => lambda {OptionParser.new do |opts|
|
|
50
|
+
operator = Bio::FinishM::Primers::Checker.new
|
|
51
|
+
operator.add_options(opts, options)
|
|
52
|
+
end},
|
|
53
|
+
'finish' => lambda {OptionParser.new do |opts|
|
|
54
|
+
operator = Bio::FinishM::Finisher.new
|
|
55
|
+
operator.add_options(opts, options)
|
|
56
|
+
end},
|
|
57
|
+
'gapfill' => lambda {OptionParser.new do |opts|
|
|
58
|
+
operator = Bio::FinishM::GapFiller.new
|
|
59
|
+
operator.add_options(opts, options)
|
|
60
|
+
end},
|
|
61
|
+
'wander' => lambda {OptionParser.new do |opts|
|
|
62
|
+
operator = Bio::FinishM::Wanderer.new
|
|
63
|
+
operator.add_options(opts, options)
|
|
64
|
+
end},
|
|
65
|
+
'fluff' => lambda {OptionParser.new do |opts|
|
|
66
|
+
operator = Bio::FinishM::Fluff.new
|
|
67
|
+
operator.add_options(opts, options)
|
|
68
|
+
end},
|
|
69
|
+
'explore' => lambda {OptionParser.new do |opts|
|
|
70
|
+
operator = Bio::FinishM::Explorer.new
|
|
71
|
+
operator.add_options(opts, options)
|
|
72
|
+
end},
|
|
73
|
+
'assemble' => lambda {OptionParser.new do |opts|
|
|
74
|
+
operator = Bio::FinishM::Assembler.new
|
|
75
|
+
operator.add_options(opts, options)
|
|
76
|
+
end},
|
|
77
|
+
'visualise' => lambda {OptionParser.new do |opts|
|
|
78
|
+
operator = Bio::FinishM::Visualise.new
|
|
79
|
+
operator.add_options(opts, options)
|
|
80
|
+
end},
|
|
81
|
+
'sequence' => lambda {OptionParser.new do |opts|
|
|
82
|
+
operator = Bio::FinishM::Sequence.new
|
|
83
|
+
operator.add_options(opts, options)
|
|
84
|
+
end},
|
|
85
|
+
'roundup' => lambda {OptionParser.new do |opts|
|
|
86
|
+
operator = Bio::FinishM::RoundUp.new
|
|
87
|
+
operator.add_options(opts, options)
|
|
88
|
+
end},
|
|
89
|
+
'count_paths' => lambda {OptionParser.new do |opts|
|
|
90
|
+
operator = Bio::FinishM::PathCounter.new
|
|
91
|
+
operator.add_options(opts, options)
|
|
92
|
+
end},
|
|
93
|
+
'find_orfs' => lambda {OptionParser.new do |opts|
|
|
94
|
+
operator = Bio::FinishM::ORFsFinder.new
|
|
95
|
+
operator.add_options(opts, options)
|
|
96
|
+
end}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
subcommand = nil
|
|
100
|
+
if ARGV[0] and ARGV[0].match(/finishm$/) #if debugging e.g. 'pry finishm wander ..'. But be careful of finishm no arguments
|
|
101
|
+
subcommand = ARGV.shift
|
|
102
|
+
subcommand = ARGV.shift
|
|
103
|
+
else
|
|
104
|
+
# not debugging
|
|
105
|
+
subcommand = ARGV.shift
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
if subcommand.nil?
|
|
109
|
+
$stderr.puts global
|
|
110
|
+
exit 1
|
|
111
|
+
elsif !subcommands[subcommand]
|
|
112
|
+
$stderr.puts "Unrecognized subcommand: #{subcommand}"
|
|
113
|
+
exit 1
|
|
114
|
+
else
|
|
115
|
+
# Add options specific for subcommand
|
|
116
|
+
opts = subcommands[subcommand].call
|
|
117
|
+
|
|
118
|
+
# Add global logging options
|
|
119
|
+
opts.separator "\nVerbosity:\n\n"
|
|
120
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
|
|
121
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
|
122
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
|
|
123
|
+
opts.separator "\n"
|
|
124
|
+
opts.parse!
|
|
125
|
+
|
|
126
|
+
# Setup logging
|
|
127
|
+
Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME); log.outputters.each {|o| o.formatter = Log4r::PatternFormatter.new(:pattern => "%5l %c %d: %m", :date_pattern => '%d/%m %T')}
|
|
128
|
+
Bio::Log::LoggerPlus.new 'bio-velvet'; Bio::Log::CLI.configure 'bio-velvet'
|
|
129
|
+
|
|
130
|
+
log.debug "Running FinishM #{subcommand} with these options: #{PP.pp(options, "").gsub(/\n$/,'')}"
|
|
131
|
+
|
|
132
|
+
error_message = operator.validate_options(options, ARGV)
|
|
133
|
+
if error_message.nil? or error_message == false
|
|
134
|
+
operator.run options, ARGV
|
|
135
|
+
else
|
|
136
|
+
$stderr.puts
|
|
137
|
+
$stderr.puts "ERROR parsing options: #{error_message}"
|
|
138
|
+
$stderr.puts
|
|
139
|
+
$stderr.puts opts
|
|
140
|
+
exit 1
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'optparse'
|
|
4
|
+
require 'bio-logger'
|
|
5
|
+
|
|
6
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
|
|
7
|
+
|
|
8
|
+
# Parse command line options into the options hash
|
|
9
|
+
options = {
|
|
10
|
+
:logger => 'stderr',
|
|
11
|
+
:log_level => 'info',
|
|
12
|
+
}
|
|
13
|
+
o = OptionParser.new do |opts|
|
|
14
|
+
opts.banner = "
|
|
15
|
+
Usage: #{SCRIPT_NAME} <arguments>
|
|
16
|
+
|
|
17
|
+
Description of what this program does...\n\n"
|
|
18
|
+
|
|
19
|
+
opts.on("--velvet-pregraph GRAPH_FILE", "PreGraph file output from velveth [required]") do |arg|
|
|
20
|
+
options[:velvet_pregraph_file] = arg
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# logger options
|
|
24
|
+
opts.separator "\nVerbosity:\n\n"
|
|
25
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
|
|
26
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
|
27
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
|
|
28
|
+
end; o.parse!
|
|
29
|
+
if ARGV.length != 0 or options[:velvet_pregraph_file].nil?
|
|
30
|
+
$stderr.puts o
|
|
31
|
+
exit 1
|
|
32
|
+
end
|
|
33
|
+
# Setup logging
|
|
34
|
+
Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Read in the velvet graph
|
|
38
|
+
log.info "Parsing graph from #{options[:velvet_pregraph_file]}"
|
|
39
|
+
graph = Bio::Velvet::Graph.parse_from_file(options[:velvet_pregraph_file])
|
|
40
|
+
log.info "Finished parsing graph, found #{graph.number_of_nodes} nodes"
|
|
41
|
+
|
|
42
|
+
# Log the number of nodes and arcs in the current graph
|
|
43
|
+
|
|
44
|
+
# Read in the fasta file of immutable nodes, and extract the two most immutable
|
|
45
|
+
# Log that they are found
|
|
46
|
+
|
|
47
|
+
# Determine that the graph is connected or not between the two most immutable nodes, using some graph theoretic algorithm
|
|
48
|
+
# If the graph is not connected, then there is no hope, exit
|
|
49
|
+
|
|
50
|
+
# Go through the graph to get a list of the cap nodes
|
|
51
|
+
# Log the number of cap nodes found
|
|
52
|
+
|
|
53
|
+
# Trim off all the cap nodes back to cross nodes, keeping track of the lengths
|
|
54
|
+
|
|
55
|
+
# Print the graph in graphviz dot format
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'optparse'
|
|
4
|
+
require 'bio-logger'
|
|
5
|
+
require 'bio-velvet'
|
|
6
|
+
require 'tempfile'
|
|
7
|
+
require 'pp'
|
|
8
|
+
|
|
9
|
+
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
|
|
10
|
+
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
|
11
|
+
require 'priner'
|
|
12
|
+
|
|
13
|
+
# Parse command line options into the options hash
|
|
14
|
+
options = {
|
|
15
|
+
:logger => 'stderr',
|
|
16
|
+
:log_level => 'info',
|
|
17
|
+
:velvet_kmer_size => 43,#TODO: these options should be exposed to the user, and perhaps not guessed at
|
|
18
|
+
:contig_end_length => 200,
|
|
19
|
+
:output_assembly_path => '/tmp/velvetAssembly',
|
|
20
|
+
:graph_search_leash_length => 3000,
|
|
21
|
+
:assembly_coverage_cutoff => 1.5,
|
|
22
|
+
}
|
|
23
|
+
o = OptionParser.new do |opts|
|
|
24
|
+
opts.banner = "
|
|
25
|
+
Usage: #{SCRIPT_NAME} --reads <read_file> --contig <contig_file>
|
|
26
|
+
|
|
27
|
+
Takes a set of reads and a contig that contains gap characters. Then it tries to fill in
|
|
28
|
+
these N characters. It is possible that there is multiple ways to close the gap - in that case
|
|
29
|
+
each is reported. \n\n"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
opts.on("--reads FILE", "gzipped fastq file of reads to perform the gap closing with [required]") do |arg|
|
|
33
|
+
options[:reads_file] = arg
|
|
34
|
+
end
|
|
35
|
+
opts.on("--contig FILE", "fasta file of single contig containing Ns that are to be closed [required]") do |arg|
|
|
36
|
+
options[:contig_file] = arg
|
|
37
|
+
end
|
|
38
|
+
opts.on("--output-trails-fasta PATH", "Output found paths to this file in fasta format [default: off]") do |arg|
|
|
39
|
+
options[:overall_trail_output_fasta_file] = arg
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
opts.separator "\nOptional arguments:\n\n"
|
|
43
|
+
opts.on("--overhang NUM", "Start assembling this far from the gap [default: #{options[:contig_end_length]}]") do |arg|
|
|
44
|
+
options[:contig_end_length] = arg.to_i
|
|
45
|
+
end
|
|
46
|
+
opts.on("--start OFFSET", "Start trying to fill from this position in the contig, requires --stop [default: found from position of Ns}]") do |arg|
|
|
47
|
+
options[:start_offset] = arg.to_i-1
|
|
48
|
+
end
|
|
49
|
+
opts.on("--stop OFFSET", "Start trying to fill to this position in the contig, requires --start [default: found from position of Ns}]") do |arg|
|
|
50
|
+
options[:end_offset] = arg.to_i-1
|
|
51
|
+
end
|
|
52
|
+
opts.on("--assembly-png PATH", "Output assembly as a PNG file [default: off]") do |arg|
|
|
53
|
+
options[:output_graph_png] = arg
|
|
54
|
+
end
|
|
55
|
+
opts.on("--assembly-svg PATH", "Output assembly as an SVG file [default: off]") do |arg|
|
|
56
|
+
options[:output_graph_svg] = arg
|
|
57
|
+
end
|
|
58
|
+
opts.on("--assembly-dot PATH", "Output assembly as an DOT file [default: off]") do |arg|
|
|
59
|
+
options[:output_graph_dot] = arg
|
|
60
|
+
end
|
|
61
|
+
opts.on("--velvet-kmer KMER", "kmer size to use with velvet [default: #{options[:velvet_kmer_size]}]") do |arg|
|
|
62
|
+
options[:velvet_kmer_size] = arg.to_i
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
opts.separator "\nDebug-related options:\n\n"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# logger options
|
|
70
|
+
opts.separator "\nVerbosity:\n\n"
|
|
71
|
+
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
|
|
72
|
+
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
|
73
|
+
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
|
|
74
|
+
end; o.parse!
|
|
75
|
+
if ARGV.length != 0 or options[:reads_file].nil? or options[:contig_file].nil? or options[:overall_trail_output_fasta_file].nil?
|
|
76
|
+
$stderr.puts o
|
|
77
|
+
exit 1
|
|
78
|
+
end
|
|
79
|
+
# Setup logging
|
|
80
|
+
Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
|
81
|
+
Bio::Log::LoggerPlus.new 'bio-velvet'; Bio::Log::CLI.configure 'bio-velvet'
|
|
82
|
+
log.outputters[0].formatter = Log4r::PatternFormatter.new(:pattern => "%5l %c %d: %m", :date_pattern => '%d/%m %T')
|
|
83
|
+
|
|
84
|
+
log.debug "Running finishm with options: #{PP.pp(options, "").gsub(/\n$/,'')}" if log.debug?
|
|
85
|
+
|
|
86
|
+
# Find where the Ns are
|
|
87
|
+
n_region_start = nil
|
|
88
|
+
n_region_end = nil
|
|
89
|
+
sequence = nil
|
|
90
|
+
Bio::FlatFile.foreach(options[:contig_file]) do |seq|
|
|
91
|
+
if sequence
|
|
92
|
+
raise Exception, "Sorry, this script can only handle single sequences to be gap filled at the moment"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
sequence = seq.seq
|
|
96
|
+
|
|
97
|
+
if options[:start_offset] and options[:end_offset]
|
|
98
|
+
log.info "Trying to gap fill from #{options[:start_offset]+1} to #{options[:end_offset]+1}"
|
|
99
|
+
n_region_start = options[:start_offset]
|
|
100
|
+
n_region_end = options[:end_offset]
|
|
101
|
+
else
|
|
102
|
+
log.info "Determining where to fill from the presence of Ns"
|
|
103
|
+
|
|
104
|
+
matches = sequence.match(/(N+)/i)
|
|
105
|
+
if !matches
|
|
106
|
+
raise "Unable to find any gaps in the input sequence. That was a bit too easy.."
|
|
107
|
+
end
|
|
108
|
+
n_region_start = matches.offset(0)[0]
|
|
109
|
+
n_region_end = n_region_start + matches[1].length
|
|
110
|
+
log.info "Detected a gap between #{n_region_start} and #{n_region_end}"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Check to make sure we are sufficiently distant from the ends
|
|
114
|
+
if n_region_start < options[:contig_end_length] or
|
|
115
|
+
sequence.length - n_region_end < options[:contig_end_length]
|
|
116
|
+
raise "The gap is too close to the end of the contig, sorry"
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Do the assembly
|
|
121
|
+
graph = nil
|
|
122
|
+
if options[:previously_serialized_parsed_graph_file].nil?
|
|
123
|
+
velvet_result = nil
|
|
124
|
+
if options[:previous_assembly].nil? #If assembly has not already been carried out
|
|
125
|
+
Tempfile.open('anchors.fa') do |tempfile|
|
|
126
|
+
tempfile.puts ">anchor1"
|
|
127
|
+
tempfile.puts sequence[n_region_start-options[:contig_end_length]-1...n_region_start]
|
|
128
|
+
tempfile.puts ">anchor2"
|
|
129
|
+
#Have to be in reverse, because the node finder finds the node at the start of the read, not the end
|
|
130
|
+
fwd2 = Bio::Sequence::NA.new(sequence[n_region_end..(n_region_end+options[:contig_end_length])])
|
|
131
|
+
tempfile.puts fwd2.reverse_complement.to_s
|
|
132
|
+
tempfile.close
|
|
133
|
+
log.debug "Inputting anchors into the assembly: #{File.open(tempfile.path).read}" if log.debug?
|
|
134
|
+
|
|
135
|
+
log.info "Assembling sampled reads with velvet"
|
|
136
|
+
# Bit of a hack, but have to use -short1 as the anchors because then start and end anchors will have node IDs 1,2,... etc.
|
|
137
|
+
velvet_result = Bio::Velvet::Runner.new.velvet(
|
|
138
|
+
options[:velvet_kmer_size],
|
|
139
|
+
"-short #{tempfile.path} -short2 -fastq.gz #{options[:reads_file]}",
|
|
140
|
+
"-read_trkg yes -cov_cutoff #{options[:assembly_coverage_cutoff]}",
|
|
141
|
+
:output_assembly_path => options[:output_assembly_path]
|
|
142
|
+
)
|
|
143
|
+
if log.debug?
|
|
144
|
+
log.debug "velveth stdout: #{velvet_result.velveth_stdout}"
|
|
145
|
+
log.debug "velveth stderr: #{velvet_result.velveth_stderr}"
|
|
146
|
+
log.debug "velvetg stdout: #{velvet_result.velvetg_stdout}"
|
|
147
|
+
log.debug "velvetg stderr: #{velvet_result.velvetg_stderr}"
|
|
148
|
+
end
|
|
149
|
+
log.info "Finished running assembly"
|
|
150
|
+
end
|
|
151
|
+
else
|
|
152
|
+
log.info "Using previous assembly stored at #{options[:previous_assembly]}"
|
|
153
|
+
velvet_result = Bio::Velvet::Result.new
|
|
154
|
+
velvet_result.result_directory = options[:previous_assembly]
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
log.info "Parsing the graph output from velvet"
|
|
158
|
+
graph = Bio::Velvet::Graph.parse_from_file(File.join velvet_result.result_directory, 'LastGraph')
|
|
159
|
+
log.info "Finished parsing graph: found #{graph.nodes.length} nodes and #{graph.arcs.length} arcs"
|
|
160
|
+
|
|
161
|
+
if options[:serialize_parsed_graph_file]
|
|
162
|
+
log.info "Storing a binary version of the graph file for later use at #{options[:serialize_parsed_graph_file]}"
|
|
163
|
+
File.open(options[:serialize_parsed_graph_file],'wb') do |f|
|
|
164
|
+
f.print Marshal.dump(graph)
|
|
165
|
+
end
|
|
166
|
+
log.info "Stored a binary representation of the velvet graph at #{options[:serialize_parsed_graph_file]}"
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
if options[:assembly_coverage_cutoff]
|
|
170
|
+
log.info "Removing low-coverage nodes from the graph (less than #{options[:assembly_coverage_cutoff]})"
|
|
171
|
+
cutoffer = Bio::AssemblyGraphAlgorithms::CoverageBasedGraphFilter.new
|
|
172
|
+
deleted_nodes, deleted_arcs = cutoffer.remove_low_coverage_nodes(graph, options[:assembly_coverage_cutoff], :whitelisted_sequences => [1,2])
|
|
173
|
+
|
|
174
|
+
log.info "Removed #{deleted_nodes.length} nodes and #{deleted_arcs.length} arcs from the graph due to low coverage"
|
|
175
|
+
log.info "Now there is #{graph.nodes.length} nodes and #{graph.arcs.length} arcs remaining"
|
|
176
|
+
end
|
|
177
|
+
else
|
|
178
|
+
log.info "Restoring graph file from #{options[:previously_serialized_parsed_graph_file]}.."
|
|
179
|
+
graph = Marshal.load(File.open(options[:previously_serialized_parsed_graph_file]))
|
|
180
|
+
log.info "Restoration complete"
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# Find the anchor nodes again
|
|
185
|
+
finder = Bio::AssemblyGraphAlgorithms::NodeFinder.new
|
|
186
|
+
log.info "Finding node representing the end of the each contig"
|
|
187
|
+
i = 1
|
|
188
|
+
anchor_sequence_ids = [1,2]
|
|
189
|
+
start_node, start_node_forward = finder.find_unique_node_with_sequence_id(graph, 1)
|
|
190
|
+
end_node, end_node_forward = finder.find_unique_node_with_sequence_id(graph, 2)
|
|
191
|
+
if start_node and end_node
|
|
192
|
+
log.info "Found both anchoring nodes in the graph: #{start_node.node_id}/#{start_node_forward} and #{end_node.node_id}/#{end_node_forward}"
|
|
193
|
+
else
|
|
194
|
+
log.error "start node not found" if start_node.nil?
|
|
195
|
+
log.error "end node not found" if end_node.nil?
|
|
196
|
+
raise "Unable to find both anchor reads from the assembly, cannot continue. This is probably an error with this script, not you."
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
log.info "Removing nodes unconnected to either the start or the end from the graph.."
|
|
200
|
+
original_num_nodes = graph.nodes.length
|
|
201
|
+
original_num_arcs = graph.arcs.length
|
|
202
|
+
filter = Bio::AssemblyGraphAlgorithms::ConnectivityBasedGraphFilter.new
|
|
203
|
+
filter.remove_unconnected_nodes(graph, [start_node, end_node])
|
|
204
|
+
log.info "Removed #{original_num_nodes-graph.nodes.length} nodes and #{original_num_arcs-graph.arcs.length} arcs"
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
if options[:output_graph_png]
|
|
208
|
+
log.info "Converting assembly to a graphviz PNG"
|
|
209
|
+
viser = Bio::Assembly::ABVisualiser.new
|
|
210
|
+
gv = viser.graphviz(graph, {:start_node_id => start_node.node_id, :end_node_id => end_node.node_id})
|
|
211
|
+
gv.output :png => options[:output_graph_png], :use => :neato
|
|
212
|
+
end
|
|
213
|
+
if options[:output_graph_svg]
|
|
214
|
+
log.info "Converting assembly to a graphviz SVG"
|
|
215
|
+
viser = Bio::Assembly::ABVisualiser.new
|
|
216
|
+
gv = viser.graphviz(graph, {:start_node_id => start_node.node_id, :end_node_id => end_node.node_id})
|
|
217
|
+
gv.output :svg => options[:output_graph_svg], :use => :neato
|
|
218
|
+
end
|
|
219
|
+
if options[:output_graph_dot]
|
|
220
|
+
log.info "Converting assembly to a graphviz DOT"
|
|
221
|
+
viser = Bio::Assembly::ABVisualiser.new
|
|
222
|
+
gv = viser.graphviz(graph, {:start_node_id => start_node.node_id, :end_node_id => end_node.node_id, :digraph => false})
|
|
223
|
+
gv.output :dot => options[:output_graph_dot]
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
log.info "Searching for trails between the nodes within the assembly graph"
|
|
229
|
+
cartographer = Bio::AssemblyGraphAlgorithms::AcyclicConnectionFinder.new
|
|
230
|
+
trails = cartographer.find_trails_between_nodes(graph, start_node, end_node, options[:graph_search_leash_length], start_node_forward)
|
|
231
|
+
log.info "Found #{trails.length} trail(s) in total"
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
log.debug "Outputing trail sequences"
|
|
235
|
+
File.open(options[:overall_trail_output_fasta_file],'w') do |f|
|
|
236
|
+
trails.each_with_index do |trail, i|
|
|
237
|
+
f.puts ">trail#{i+1}"
|
|
238
|
+
f.puts trail.sequence
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|