finishm 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'ffi'
|
2
|
+
|
3
|
+
class Bio::FinishM::VelvetCBinding
|
4
|
+
extend FFI::Library
|
5
|
+
include Bio::FinishM::Logging
|
6
|
+
|
7
|
+
ffi_lib File.join(File.dirname(__FILE__),'..','external','libfinishm.so.1.0')
|
8
|
+
|
9
|
+
############## ProbeNodeFinding ##################################
|
10
|
+
# IDnum* extract_best_probe_reads(Graph* graph, IDnum* probeReadIDs, IDnum numProbeReads);
|
11
|
+
attach_function :extract_best_probe_reads, [:pointer, :pointer, :int32], :pointer
|
12
|
+
|
13
|
+
############## ReadToNode ##################################
|
14
|
+
# typedef struct {
|
15
|
+
# IDnum read_id;
|
16
|
+
# IDnum node_id;
|
17
|
+
# } ReadIdNodeId; //this is a doubly used structure, once for creation of the data structure, and also in the result of the indexation. This is a little bad, but eh.
|
18
|
+
class ReadIdNodeIdStruct < FFI::Struct
|
19
|
+
layout :read_id, :int32,
|
20
|
+
:node_id, :int32
|
21
|
+
end
|
22
|
+
|
23
|
+
# typedef struct {
|
24
|
+
# IDnum num_contents;
|
25
|
+
# IDnum num_reads;
|
26
|
+
# IDnum* index;
|
27
|
+
# ReadIdNodeId* contents;
|
28
|
+
# } ReadIdToNodeIdLookupTable;
|
29
|
+
class ReadIdToNodeIdLookupTableStruct < FFI::Struct
|
30
|
+
layout :num_contents, :int32,
|
31
|
+
:num_reads, :int32,
|
32
|
+
:index, :pointer,
|
33
|
+
:contents, :pointer
|
34
|
+
end
|
35
|
+
|
36
|
+
# typedef struct {
|
37
|
+
# IDnum num_nodes;
|
38
|
+
# ReadIdNodeId* read_ids_node_ids;
|
39
|
+
# } ReadIdToNodeIdIndexation;
|
40
|
+
class ReadIdToNodeIdIndexationStruct < FFI::Struct
|
41
|
+
layout :num_nodes, :int32,
|
42
|
+
:read_ids_node_ids, ReadIdNodeIdStruct.ptr
|
43
|
+
end
|
44
|
+
|
45
|
+
# ReadIdToNodeIdIndexation getReadIdToNodeIdIndexation(ReadIdToNodeIdLookupTable* lookupTable, IDnum readID);
|
46
|
+
attach_function :getReadIdToNodeIdIndexation, [:pointer, :int32], ReadIdToNodeIdIndexationStruct.by_value
|
47
|
+
alias_method :get_read_id_to_node_id_indexation, :getReadIdToNodeIdIndexation
|
48
|
+
|
49
|
+
# ReadIdToNodeIdLookupTable* readReadIdToNodeIdLookupTable(char* fileName);
|
50
|
+
attach_function :readReadIdToNodeIdLookupTable, [:string], :pointer
|
51
|
+
alias_method :read_read_id_to_node_id_lookup_table, :readReadIdToNodeIdLookupTable
|
52
|
+
end
|
53
|
+
|
54
|
+
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'bio-velvet'
|
2
|
+
require 'bio'
|
3
|
+
|
4
|
+
module Bio
|
5
|
+
module AssemblyGraphAlgorithms
|
6
|
+
class GraphWalkingException < Exception; end
|
7
|
+
|
8
|
+
# A class to extract the sequence of a trail of nodes, in a very lazy, give up easily,
|
9
|
+
# kind of way, whenever things get too complicated. This class will throw its hands in
|
10
|
+
# the air (by raising and GraphWalkingException) when both ends of the one node connect
|
11
|
+
# to an adjacent node on the trail.
|
12
|
+
#
|
13
|
+
# Hopefully this method should be sufficient for most cases. It might be improved by
|
14
|
+
# using a fancier algorithm that considers more than two nodes at a time.
|
15
|
+
class LazyGraphWalker
|
16
|
+
# Given a list of nodes, one that Node objects when the #each method is called,
|
17
|
+
# Return the sequence (as a plain string) of the nodes concatenated together,
|
18
|
+
# being cogent of the directionality of the arcs between the nodes.
|
19
|
+
#
|
20
|
+
# This may not be straightforward, particularly in the presence of palindromic
|
21
|
+
# nodes. If any difficulties are encountered, a GraphWalkingException is thrown.
|
22
|
+
def trail_sequence(velvet_graph, ordered_collection_of_nodes)
|
23
|
+
seq = ''
|
24
|
+
last_node = nil
|
25
|
+
last_node_used_up_end = nil
|
26
|
+
state = :first
|
27
|
+
log = Bio::Log::LoggerPlus['finishm']
|
28
|
+
|
29
|
+
add_first_node_forward = lambda do |first_node|
|
30
|
+
seq += first_node.sequence
|
31
|
+
end
|
32
|
+
add_first_node_reverse = lambda do |first_node|
|
33
|
+
seq += revcom first_node.sequence
|
34
|
+
end
|
35
|
+
|
36
|
+
ordered_collection_of_nodes.each do |node|
|
37
|
+
if last_node.nil?
|
38
|
+
# First node in the trail, can't figure it out here
|
39
|
+
last_node = node
|
40
|
+
state = :second
|
41
|
+
else
|
42
|
+
# Now there is two nodes in the frame. What is the edge between them, in terms of its direction?
|
43
|
+
arcs = velvet_graph.get_arcs_by_node(last_node, node)
|
44
|
+
|
45
|
+
if arcs.empty?
|
46
|
+
raise GraphWalkingException, "Attempted to find trail between two unconnected nodes: #{last_node.inspect}, #{node.inspect}"
|
47
|
+
elsif arcs.length > 1
|
48
|
+
raise GraphWalkingException, "Two adjacent nodes in the graph are (at least) doubly connected too each other, LazyGraphWalker is throwing its hands in the air. Nodes are (in the same order as the specified trail) #{last_node.inspect}, #{node.inspect}"
|
49
|
+
|
50
|
+
# There is a link previous => current, the easy case
|
51
|
+
else
|
52
|
+
arc = arcs[0]
|
53
|
+
# Add the first node, if we are adding the second
|
54
|
+
if state == :second
|
55
|
+
state = :after_second
|
56
|
+
log.debug "Adding start node from arc #{arc}, first_node_id=#{last_node.node_id}, second_node_id=#{node.node_id}" if log and log.debug?
|
57
|
+
if arc.connects_end_to_beginning?(last_node.node_id, node.node_id)
|
58
|
+
log.debug "Adding end to beginning" if log and log.debug?
|
59
|
+
add_first_node_forward.call last_node
|
60
|
+
last_node_used_up_end = :start
|
61
|
+
elsif arc.connects_end_to_end?(last_node.node_id, node.node_id)
|
62
|
+
log.debug "Adding end to end" if log and log.debug?
|
63
|
+
add_first_node_forward.call last_node
|
64
|
+
last_node_used_up_end = :start
|
65
|
+
elsif arc.connects_beginning_to_beginning?(last_node.node_id, node.node_id)
|
66
|
+
log.debug "Adding beginning to beginning" if log and log.debug?
|
67
|
+
add_first_node_reverse.call last_node
|
68
|
+
last_node_used_up_end = :end
|
69
|
+
elsif arc.connects_beginning_to_end?(last_node.node_id, node.node_id)
|
70
|
+
log.debug "Adding beginning to end" if log and log.debug?
|
71
|
+
add_first_node_reverse.call last_node
|
72
|
+
last_node_used_up_end = :end
|
73
|
+
else
|
74
|
+
raise "Programming error"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
log.debug "At crossroads, with node_id=#{node.node_id}, last_node_used_up_end=#{last_node_used_up_end}, arc=#{arc}" if log and log.debug?
|
78
|
+
|
79
|
+
# Add the new node's sequence
|
80
|
+
if state == :after_second
|
81
|
+
if arc.connects_end_to_beginning?(last_node.node_id, node.node_id) and last_node_used_up_end == :start
|
82
|
+
log.debug "Adding end to beginning" if log and log.debug?
|
83
|
+
seq += node.ends_of_kmers_of_node
|
84
|
+
last_node_used_up_end = :start
|
85
|
+
elsif arc.connects_end_to_end?(last_node.node_id, node.node_id) and last_node_used_up_end == :start
|
86
|
+
log.debug "Adding end to end" if log and log.debug?
|
87
|
+
seq += node.ends_of_kmers_of_twin_node
|
88
|
+
last_node_used_up_end = :end
|
89
|
+
elsif arc.connects_beginning_to_beginning?(last_node.node_id, node.node_id) and last_node_used_up_end == :end
|
90
|
+
log.debug "Adding beginning to beginning" if log and log.debug?
|
91
|
+
seq += node.ends_of_kmers_of_node
|
92
|
+
last_node_used_up_end = :start
|
93
|
+
elsif arc.connects_beginning_to_end?(last_node.node_id, node.node_id) and last_node_used_up_end == :end
|
94
|
+
log.debug "Adding beginning to end" if log and log.debug?
|
95
|
+
seq += node.ends_of_kmers_of_twin_node
|
96
|
+
last_node_used_up_end = :end
|
97
|
+
else
|
98
|
+
raise GraphWalkingException, "The trail being followed to create the trail sequence isn't continuous in a consistent direction. Failed at node #{node.node_id}. last_node_used_up_end=#{last_node_used_up_end}, arc=#{arc}"
|
99
|
+
end
|
100
|
+
else
|
101
|
+
raise "Programming error in state machine"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
last_node = node
|
105
|
+
end
|
106
|
+
end
|
107
|
+
return '' if last_node.nil? #Return nothing when an empty collection is given.
|
108
|
+
|
109
|
+
# When only 1 node is given, return that node's sequence in arbitrary orientation
|
110
|
+
if state == :second
|
111
|
+
return last_node.sequence
|
112
|
+
else
|
113
|
+
return seq
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
private
|
118
|
+
def revcom(seq)
|
119
|
+
Bio::Sequence::NA.new(seq).reverse_complement.to_s.upcase
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
1
|
@@ -0,0 +1,224 @@
|
|
1
|
+
class Bio::FinishM::Assembler
|
2
|
+
include Bio::FinishM::Logging
|
3
|
+
|
4
|
+
def add_options(optparse_object, options)
|
5
|
+
optparse_object.banner = "\nUsage: finishm assemble --assemble-from <node_shorthand> --output-contigs <output.fa> <assembly_definition>
|
6
|
+
|
7
|
+
Assemble
|
8
|
+
\n\n"
|
9
|
+
|
10
|
+
options.merge!({
|
11
|
+
:output_pathspec => false,
|
12
|
+
:progressbar => true,
|
13
|
+
:min_contig_size => 500,
|
14
|
+
:bubbly => false,
|
15
|
+
:max_tip_length => Bio::AssemblyGraphAlgorithms::BubblyAssembler::DEFAULT_MAX_TIP_LENGTH,
|
16
|
+
:max_bubble_length => Bio::AssemblyGraphAlgorithms::BubblyAssembler::DEFAULT_MAX_BUBBLE_LENGTH,
|
17
|
+
:bubble_node_count_limit => Bio::AssemblyGraphAlgorithms::BubblyAssembler::DEFAULT_BUBBLE_NODE_COUNT_LIMIT,
|
18
|
+
:min_confirming_recoherence_kmer_reads => Bio::AssemblyGraphAlgorithms::SingleEndedAssembler::DEFAULT_MIN_CONFIRMING_RECOHERENCE_READS,
|
19
|
+
})
|
20
|
+
|
21
|
+
optparse_object.separator "\nRequired arguments:\n\n"
|
22
|
+
optparse_object.on("--output-contigs PATH", "Output found paths to this file in fasta format [required]") do |arg|
|
23
|
+
options[:output_trails_file] = arg
|
24
|
+
end
|
25
|
+
optparse_object.separator "\nThere must be some definition of reads too:\n\n" #TODO improve this help
|
26
|
+
Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
|
27
|
+
|
28
|
+
optparse_object.separator "\nOptional arguments:\n\n"
|
29
|
+
optparse_object.on("--assemble-from SHORTHAND", "Specify the node and direction to start assembing from e.g. '3s' to start forward from node 3, '4e' to start reverse from node 4 [default: assemble the entire graph]") do |arg|
|
30
|
+
unless arg.match(/^\d+[se]+/)
|
31
|
+
raise "Unable to parse node shorthand #{arg}"
|
32
|
+
end
|
33
|
+
options[:initial_node_shorthand] = arg
|
34
|
+
end
|
35
|
+
optparse_object.on("--recoherence-kmer LENGTH", Integer, "When paths diverge, try to rescue by using a bigger kmer of this length [default: none]") do |arg|
|
36
|
+
options[:recoherence_kmer] = arg
|
37
|
+
end
|
38
|
+
optparse_object.on("--recoherence-min-reads NUM", Integer, "Number of reads required to agree with recoherence [default: #{options[:min_confirming_recoherence_kmer_reads] } (when --recoherence-kmer is specified)]") do |arg|
|
39
|
+
options[:min_confirming_recoherence_kmer_reads] = arg
|
40
|
+
end
|
41
|
+
optparse_object.on("--max-tip-length LENGTH", Integer, "Maximum length of 'tip' in assembly graph to ignore [default: #{options[:max_tip_length] }]") do |arg|
|
42
|
+
options[:max_tip_length] = arg
|
43
|
+
end
|
44
|
+
optparse_object.on("--bubbly", "Assemble with the bubbly method [default: #{options[:bubbly] }]") do
|
45
|
+
options[:bubbly] = true
|
46
|
+
end
|
47
|
+
optparse_object.on("--max-bubble-size NUM", Integer, "Max bubble size available for bubbly method [default: #{options[:max_bubble_length] }]") do |arg|
|
48
|
+
options[:max_bubble_length] = arg
|
49
|
+
end
|
50
|
+
optparse_object.on("--max-bubble-complexity NUM", Integer, "Max number of nodes in a bubble to explore before giving up (0 for infinite) [default: #{options[:bubble_node_count_limit] }]") do |arg|
|
51
|
+
if arg == 0
|
52
|
+
options[:bubble_node_count_limit] = nil
|
53
|
+
else
|
54
|
+
options[:bubble_node_count_limit] = arg
|
55
|
+
end
|
56
|
+
end
|
57
|
+
optparse_object.on("--output-pathspec", "Give the sequence of nodes used in the path in the output contig file [default: #{options[:output_pathspec] }]") do
|
58
|
+
options[:output_pathspec] = true
|
59
|
+
end
|
60
|
+
optparse_object.on("--output-contig-stats FILE", "Output stats about each contig to this file [default: don't output anything]") do |arg|
|
61
|
+
options[:output_stats] = arg
|
62
|
+
end
|
63
|
+
optparse_object.on("--no-progressbar", "Don't show a progress bar [default: do show one unless --assemble-from is specified]") do
|
64
|
+
options[:progressbar] = false
|
65
|
+
end
|
66
|
+
optparse_object.on("--min-contig-length LENGTH",Integer,"Don't print contigs shorter than this [default: #{options[:min_contig_size] }]") do |arg|
|
67
|
+
options[:min_contig_size] = arg
|
68
|
+
end
|
69
|
+
optparse_object.on("--min-starting-node-coverage COVERAGE",Float,"Only start exploring from nodes with at least this much coverage [default: start from all nodes]") do |arg|
|
70
|
+
options[:min_coverage_of_start_nodes] = arg
|
71
|
+
end
|
72
|
+
optparse_object.on("--min-starting-node-length LENGTH",Integer,"Only start exploring from nodes with at least this length [default: start from all nodes]") do |arg|
|
73
|
+
options[:min_length_of_start_nodes] = arg
|
74
|
+
end
|
75
|
+
optparse_object.on("--max-coverage-at-fork COVERAGE",Float,"When reached a fork, don't take paths with more than this much coverage [default: not applied]") do |arg|
|
76
|
+
options[:max_coverage_at_fork] = arg
|
77
|
+
end
|
78
|
+
optparse_object.on("--badformat FILE", "Output contigs in badformat file") do |arg|
|
79
|
+
options[:output_badformat_file] = arg
|
80
|
+
end
|
81
|
+
optparse_object.on("--debug", "Build the graph, then drop to a pry console. [default: #{options[:debug] }]") do
|
82
|
+
options[:debug] = true
|
83
|
+
end
|
84
|
+
|
85
|
+
Bio::FinishM::GraphGenerator.new.add_options optparse_object, options
|
86
|
+
end
|
87
|
+
|
88
|
+
def validate_options(options, argv)
|
89
|
+
#TODO: give a better description of the error that has occurred
|
90
|
+
#TODO: require reads options
|
91
|
+
if argv.length != 0
|
92
|
+
return "Dangling argument(s) found e.g. #{argv[0]}"
|
93
|
+
else
|
94
|
+
[
|
95
|
+
:output_trails_file,
|
96
|
+
].each do |sym|
|
97
|
+
if options[sym].nil?
|
98
|
+
return "No option found to specify #{sym}."
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Need reads unless there is already an assembly
|
103
|
+
unless options[:previous_assembly] or options[:previously_serialized_parsed_graph_file]
|
104
|
+
error = Bio::FinishM::ReadInput.new.validate_options(options, [])
|
105
|
+
return error unless error.nil?
|
106
|
+
else
|
107
|
+
return nil
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def run(options, argv)
|
113
|
+
# Generate the graph
|
114
|
+
read_input = Bio::FinishM::ReadInput.new
|
115
|
+
read_input.parse_options options
|
116
|
+
|
117
|
+
finishm_graph = nil
|
118
|
+
if options[:recoherence_kmer].nil?
|
119
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options.merge({
|
120
|
+
:dont_parse_reads => true,
|
121
|
+
:dont_parse_noded_reads => true,
|
122
|
+
}))
|
123
|
+
else
|
124
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
|
125
|
+
end
|
126
|
+
graph = finishm_graph.graph
|
127
|
+
|
128
|
+
if options[:initial_node_shorthand]
|
129
|
+
# Just check the shorthand is ok before the time consuming task of reading in more data
|
130
|
+
Bio::Velvet::Graph::OrientedNodeTrail.create_from_shorthand(options[:initial_node_shorthand], graph)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Setup assembler
|
134
|
+
assembler = nil
|
135
|
+
if options[:bubbly]
|
136
|
+
assembler = Bio::AssemblyGraphAlgorithms::BubblyAssembler.new graph
|
137
|
+
else
|
138
|
+
assembler = Bio::AssemblyGraphAlgorithms::SingleEndedAssembler.new graph
|
139
|
+
end
|
140
|
+
[
|
141
|
+
:recoherence_kmer,
|
142
|
+
:min_confirming_recoherence_kmer_reads,
|
143
|
+
:min_contig_size,
|
144
|
+
:min_coverage_of_start_nodes,
|
145
|
+
:min_length_of_start_nodes,
|
146
|
+
:max_tip_length,
|
147
|
+
:leash_length,
|
148
|
+
:max_bubble_length,
|
149
|
+
:bubble_node_count_limit,
|
150
|
+
:max_coverage_at_fork,
|
151
|
+
].each do |opt|
|
152
|
+
assembler.assembly_options[opt] = options[opt]
|
153
|
+
end
|
154
|
+
assembler.assembly_options[:sequences] = finishm_graph.velvet_sequences
|
155
|
+
|
156
|
+
binding.pry if options[:debug]
|
157
|
+
|
158
|
+
if options[:initial_node_shorthand]
|
159
|
+
initial_trail = Bio::Velvet::Graph::OrientedNodeTrail.create_from_shorthand(options[:initial_node_shorthand], graph)
|
160
|
+
log.info "Starting to assemble from specified initial node #{initial_trail.to_shorthand}.."
|
161
|
+
path, visited_nodes = assembler.assemble_from(initial_trail)
|
162
|
+
|
163
|
+
name = options[:initial_node_shorthand]
|
164
|
+
name += " #{path.to_shorthand}" if options[:output_pathspec]
|
165
|
+
|
166
|
+
File.open(options[:output_trails_file],'w') do |output|
|
167
|
+
output.puts ">#{name}"
|
168
|
+
output.puts path.sequence
|
169
|
+
end
|
170
|
+
|
171
|
+
if options[:output_badformat_file]
|
172
|
+
log.info "Writing badformat file to #{options[:output_badformat_file] }" if log.info?
|
173
|
+
|
174
|
+
File.open(options[:output_badformat_file],'w') do |out|
|
175
|
+
badformat = Bio::FinishM::BadFormatWriter.new
|
176
|
+
badformat.add_metapath(name, path)
|
177
|
+
badformat.write out
|
178
|
+
end
|
179
|
+
end
|
180
|
+
else
|
181
|
+
|
182
|
+
log.info "Attempting to assemble the entire graph"
|
183
|
+
contig_count = 0
|
184
|
+
stats_output = nil
|
185
|
+
if options[:output_stats]
|
186
|
+
stats_output = File.open(options[:output_stats],'w')
|
187
|
+
stats_output.puts %w(name coverage).join("\t")
|
188
|
+
end
|
189
|
+
badformat_writer = Bio::FinishM::BadFormatWriter.new
|
190
|
+
File.open(options[:output_trails_file],'w') do |output|
|
191
|
+
progress_io = options[:progressbar] ? $stdout : nil
|
192
|
+
assembler.assembly_options[:progressbar_io] = progress_io
|
193
|
+
assembler.assemble do |path|
|
194
|
+
contig_count += 1
|
195
|
+
name = "contig#{contig_count}"
|
196
|
+
output.print ">#{name}"
|
197
|
+
if options[:output_pathspec]
|
198
|
+
output.print " #{path.to_shorthand}"
|
199
|
+
end
|
200
|
+
output.puts
|
201
|
+
output.puts path.sequence
|
202
|
+
|
203
|
+
if !stats_output.nil?
|
204
|
+
stats_output.puts [
|
205
|
+
name,
|
206
|
+
path.length_in_bp,
|
207
|
+
path.coverage,
|
208
|
+
].join("\t")
|
209
|
+
end
|
210
|
+
|
211
|
+
badformat_writer.add_metapath(name, path) if options[:output_badformat_file]
|
212
|
+
end
|
213
|
+
end
|
214
|
+
if options[:output_badformat_file]
|
215
|
+
log.info "Writing badformat file to #{options[:output_badformat_file] }" if log.info?
|
216
|
+
File.open(options[:output_badformat_file],'w') do |out|
|
217
|
+
badformat_writer.write out
|
218
|
+
end
|
219
|
+
end
|
220
|
+
log.info "Assembled #{contig_count} contigs"
|
221
|
+
stats_output.close if !stats_output.nil?
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
@@ -0,0 +1,217 @@
|
|
1
|
+
class Bio::FinishM::Explorer
|
2
|
+
include Bio::FinishM::Logging
|
3
|
+
|
4
|
+
class InterestingPlace
|
5
|
+
attr_accessor :contig_name, :start_or_end
|
6
|
+
|
7
|
+
def to_s
|
8
|
+
"#{@contig_name}:#{@start_or_end}"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def add_options(optparse_object, options)
|
13
|
+
optparse_object.banner = "\nUsage: finishm explore --contigs <contig_file> --interesting-ends <contig1:end,contig2:start,..> --fastq-gz <reads..> --output-explored-paths <output.fa>
|
14
|
+
|
15
|
+
Given a contig end, explore the assembly graph to determine what is out there. Does assembly
|
16
|
+
fail because of lack of coverage? Is there more sequence out there that has yet to be explored?
|
17
|
+
\n\n"
|
18
|
+
|
19
|
+
options.merge!({
|
20
|
+
:contig_end_length => 200,
|
21
|
+
:graph_search_leash_length => 20000,
|
22
|
+
})
|
23
|
+
|
24
|
+
optparse_object.separator "\nRequired arguments:\n\n"
|
25
|
+
optparse_object.on("--contigs FILE", "Fasta file containing contigs to find the fluff on [required]") do |arg|
|
26
|
+
options[:contigs_file] = arg
|
27
|
+
end
|
28
|
+
optparse_object.on("--interesting-ends INTERESTING_PLACES", Array, "Comma-separated list of places to explore from e.g. 'contig1:end,MyContig2:start' to explore from the end of contig1 and the start of MyContig2. Names of contigs are as they are in the given --contigs file. Or use 'all' to mean all contig ends in the fasta file [required]") do |arg|
|
29
|
+
if arg == ['all']
|
30
|
+
options[:interesting_places] = :all
|
31
|
+
else
|
32
|
+
arg.each do |tuple|
|
33
|
+
options[:interesting_places] ||= []
|
34
|
+
splits = tuple.split(':')
|
35
|
+
if splits.length != 2
|
36
|
+
log.error "Unable to parse this --interesting-ends argument: #{tuple}"
|
37
|
+
exit 1
|
38
|
+
end
|
39
|
+
place = InterestingPlace.new
|
40
|
+
place.contig_name = splits[0]
|
41
|
+
if %(start end).include?(splits[1])
|
42
|
+
place.start_or_end = splits[1]
|
43
|
+
else
|
44
|
+
log.error "Unable to parse this --interesting-ends argument, second half must be 'start' or 'end': #{tuple}"
|
45
|
+
exit 1
|
46
|
+
end
|
47
|
+
options[:interesting_places].push place
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
optparse_object.on("--output-explored-paths PATH", "Output found paths to this file in fasta format [required]") do |arg|
|
52
|
+
options[:output_trails_file] = arg
|
53
|
+
end
|
54
|
+
optparse_object.separator "\nThere must be some definition of reads too:\n\n" #TODO improve this help
|
55
|
+
Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
|
56
|
+
|
57
|
+
optparse_object.separator "\nOptional arguments:\n\n"
|
58
|
+
optparse_object.on("--overhang NUM", Integer, "Start assembling this far from the ends of the contigs [default: #{options[:contig_end_length]}]") do |arg|
|
59
|
+
options[:contig_end_length] = arg.to_i
|
60
|
+
end
|
61
|
+
optparse_object.on("--unscaffold-first", "Break the scaffolds in the contigs file apart, and then wander between the resultant contigs[default: #{options[:graph_search_leash_length]}]") do |arg|
|
62
|
+
options[:unscaffold_first] = true
|
63
|
+
end
|
64
|
+
optparse_object.on("--leash-length NUM", Integer, "Don't explore too far in the graph, only this far and not much more [default: #{options[:graph_search_leash_length]}]") do |arg|
|
65
|
+
options[:graph_search_leash_length] = arg
|
66
|
+
end
|
67
|
+
|
68
|
+
Bio::FinishM::GraphGenerator.new.add_options optparse_object, options
|
69
|
+
end
|
70
|
+
|
71
|
+
def validate_options(options, argv)
|
72
|
+
#TODO: give a better description of the error that has occurred
|
73
|
+
#TODO: require reads options
|
74
|
+
if argv.length != 0
|
75
|
+
return "Dangling argument(s) found e.g. #{argv[0]}"
|
76
|
+
else
|
77
|
+
[
|
78
|
+
:contigs_file,
|
79
|
+
:interesting_places,
|
80
|
+
:output_trails_file,
|
81
|
+
].each do |sym|
|
82
|
+
if options[sym].nil?
|
83
|
+
return "No option found to specify #{sym}."
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Need reads unless there is already an assembly
|
88
|
+
unless options[:previous_assembly] or options[:previously_serialized_parsed_graph_file]
|
89
|
+
error = Bio::FinishM::ReadInput.new.validate_options(options, [])
|
90
|
+
return error unless error.nil?
|
91
|
+
if options[:contig_end_length] < options[:velvet_kmer_size]
|
92
|
+
return "The overhang must be greater than the size of the assembly kmer"
|
93
|
+
else
|
94
|
+
return nil
|
95
|
+
end
|
96
|
+
else
|
97
|
+
return nil
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def run(options, argv)
|
103
|
+
# Read in all the contigs sequences, removing those that are too short
|
104
|
+
probe_sequences = []
|
105
|
+
sequence_names = []
|
106
|
+
interesting_place_probe_indices = []
|
107
|
+
process_sequence = lambda do |name,seq|
|
108
|
+
if seq.length < 2*options[:contig_end_length]
|
109
|
+
log.warn "Not attempting to make connections from this contig, as it is overly short: #{name}"
|
110
|
+
next
|
111
|
+
end
|
112
|
+
if sequence_names.include?(name)
|
113
|
+
log.error "Found duplicate sequence names, being conservative and not continuuing #{name}"
|
114
|
+
exit 1
|
115
|
+
end
|
116
|
+
sequence_names.push name
|
117
|
+
|
118
|
+
sequence = seq.seq
|
119
|
+
fwd2 = Bio::Sequence::NA.new(sequence[0...options[:contig_end_length]])
|
120
|
+
probe_sequences.push fwd2.reverse_complement.to_s
|
121
|
+
|
122
|
+
probe_sequences.push sequence[(sequence.length-options[:contig_end_length])...sequence.length]
|
123
|
+
end
|
124
|
+
|
125
|
+
scaffolds = nil
|
126
|
+
if options[:unscaffold_first]
|
127
|
+
log.info "Unscaffolding scaffolds (before trying to connect them together again)"
|
128
|
+
scaffolds = Bio::FinishM::ScaffoldBreaker.new.break_scaffolds options[:contigs_file]
|
129
|
+
scaffolds.each do |scaffold|
|
130
|
+
scaffold.contigs.each do |contig|
|
131
|
+
process_sequence.call contig.name, contig.sequence
|
132
|
+
end
|
133
|
+
end
|
134
|
+
else
|
135
|
+
Bio::FlatFile.foreach(options[:contigs_file]) do |s|
|
136
|
+
process_sequence.call s.definition, s.seq
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Collect the node IDs that I'm interested in before generating the graph so don't have to do a whole assembly before getting an argument error
|
141
|
+
interesting_probe_ids_to_place = {}
|
142
|
+
if options[:interesting_places] == :all
|
143
|
+
options[:interesting_places] = []
|
144
|
+
base = 0
|
145
|
+
sequence_names.each_with_index do |name, i|
|
146
|
+
%w(start end).each do |side|
|
147
|
+
place = InterestingPlace.new
|
148
|
+
place.start_or_end = side
|
149
|
+
place.contig_name = name
|
150
|
+
options[:interesting_places].push place
|
151
|
+
|
152
|
+
interesting_probe_ids_to_place[base] = place
|
153
|
+
base += 1
|
154
|
+
end
|
155
|
+
end
|
156
|
+
else
|
157
|
+
options[:interesting_places].each do |place|
|
158
|
+
seq_index = sequence_names.find_index place.contig_name
|
159
|
+
if seq_index.nil?
|
160
|
+
log.error "Unable to find interesting contig #{place.contig_name}, cannot continue"
|
161
|
+
exit 1
|
162
|
+
else
|
163
|
+
base = seq_index*2
|
164
|
+
if place.start_or_end == 'start'
|
165
|
+
#
|
166
|
+
elsif place.start_or_end == 'end'
|
167
|
+
base += 1
|
168
|
+
end
|
169
|
+
interesting_probe_ids_to_place[base] = place
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
# Generate the graph with the probe sequences in it.
|
176
|
+
read_input = Bio::FinishM::ReadInput.new
|
177
|
+
read_input.parse_options options
|
178
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probe_sequences, read_input, options)
|
179
|
+
|
180
|
+
# Explore from the interesting nodes
|
181
|
+
|
182
|
+
output = nil
|
183
|
+
if options[:output_trails_file] == '-'
|
184
|
+
log.info "When trails are found, writing them to stdout"
|
185
|
+
output = $stdout
|
186
|
+
else
|
187
|
+
log.info "When trails are found, writing them to #{options[:output_trails_file]}"
|
188
|
+
output = File.open(options[:output_trails_file],'w')
|
189
|
+
end
|
190
|
+
|
191
|
+
explorer = Bio::AssemblyGraphAlgorithms::GraphExplorer.new
|
192
|
+
interesting_probe_ids_to_place.each do |probe_id, place|
|
193
|
+
log.info "Exploring from #{place}"
|
194
|
+
if finishm_graph.probe_nodes[probe_id].nil?
|
195
|
+
log.warn "Unable to find anchor node for #{place}, skipping exploration from there"
|
196
|
+
else
|
197
|
+
# Do exploration
|
198
|
+
onode = finishm_graph.initial_path_from_probe probe_id
|
199
|
+
paths = explorer.explore_from_node(finishm_graph.graph, onode, options[:graph_search_leash_length])
|
200
|
+
max_length = paths.collect{|path| path.path.length_in_bp}.max
|
201
|
+
log.info "Found #{paths.length} paths from #{place}, maximal length #{max_length}"
|
202
|
+
|
203
|
+
# Print explorations that come back
|
204
|
+
paths.each do |explore_path|
|
205
|
+
begin
|
206
|
+
seq = explore_path.path.sequence
|
207
|
+
output.puts ">#{place.contig_name}:#{place.start_or_end} #{explore_path.termination_type} nodes:#{explore_path}"
|
208
|
+
output.puts seq
|
209
|
+
rescue Bio::Velvet::Graph::OrientedNodeTrail::InsufficientLengthException
|
210
|
+
log.warn "Unable to retrieve sequence from '#{place.contig_name}:#{place.start_or_end} #{explore_path.termination_type} nodes:#{explore_path}' due to insufficient length of path, ignoring"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
output.close unless output == $stdout
|
216
|
+
end
|
217
|
+
end
|