finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
class Bio::FinishM::Sequence
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
|
|
4
|
+
class PathSteppingStone
|
|
5
|
+
attr_accessor :node_id, :first_side
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def add_options(optparse_object, options)
|
|
9
|
+
optparse_object.banner = "\nUsage: finishm sequence --assembly-??? --path PATH
|
|
10
|
+
|
|
11
|
+
Given a series of nodes and orientations, print the DNA sequence of the given path
|
|
12
|
+
\n\n"
|
|
13
|
+
|
|
14
|
+
options.merge!({
|
|
15
|
+
})
|
|
16
|
+
|
|
17
|
+
# Parse a string like '4s,2s,3e' into a programmatic version of a path
|
|
18
|
+
parse_path_string = lambda do |path_string|
|
|
19
|
+
path_string.collect do |str|
|
|
20
|
+
if matches = str.match(/^([01-9]+)([se])$/)
|
|
21
|
+
stone = PathSteppingStone.new
|
|
22
|
+
stone.node_id = matches[1].to_i
|
|
23
|
+
if matches[2] == 's'
|
|
24
|
+
stone.first_side = Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST
|
|
25
|
+
elsif matches[2] == 'e'
|
|
26
|
+
stone.first_side = Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST
|
|
27
|
+
else
|
|
28
|
+
raise "programming error"
|
|
29
|
+
end
|
|
30
|
+
stone
|
|
31
|
+
else
|
|
32
|
+
raise "Unable to parse stepping stone along the path: `#{arg}'. Entire path was `#{arg}'."
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
optparse_object.separator "\nOne of the following path defining arguments must be defined:\n\n"
|
|
38
|
+
optparse_object.on("--path-ids PATH", "A comma separated list of node IDs - the program attempts to determine the orientations automatically") do |arg|
|
|
39
|
+
options[:path_ids] = arg
|
|
40
|
+
end
|
|
41
|
+
optparse_object.on("--path PATH", Array, "A comma separated list of node IDs and orientations - explore from these probe IDs in the graph e.g. '4s,2s,3e' means start at the start of node 4, connecting to the beginning of node 2 and finally the end of probe 3.") do |arg|
|
|
42
|
+
options[:paths] = [parse_path_string.call(arg)]
|
|
43
|
+
end
|
|
44
|
+
optparse_object.on("--paths PATHS", "A colon separated list of comma separated lists of node IDs and orientations - e.g. '4s,2s,3e:532s,465s' means print 2 different paths") do |arg|
|
|
45
|
+
raise "Only one of --path and --paths can be specified" unless options[:paths].nil?
|
|
46
|
+
options[:paths] = []
|
|
47
|
+
arg.split(':').each do |split|
|
|
48
|
+
split.strip!
|
|
49
|
+
next if split == ''
|
|
50
|
+
options[:paths].push parse_path_string.call(split.split(','))
|
|
51
|
+
end
|
|
52
|
+
log.info "Read in #{options[:paths] } path definitions"
|
|
53
|
+
if log.debug?
|
|
54
|
+
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
optparse_object.separator "\nIf an assembly is to be done, there must be some definition of reads:\n\n" #TODO improve this help
|
|
59
|
+
Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
|
|
60
|
+
|
|
61
|
+
optparse_object.separator "\nOptional graph-related arguments:\n\n"
|
|
62
|
+
Bio::FinishM::GraphGenerator.new.add_options optparse_object, options
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def validate_options(options, argv)
|
|
66
|
+
#TODO: give a better description of the error that has occurred
|
|
67
|
+
#TODO: require reads options
|
|
68
|
+
if argv.length != 0
|
|
69
|
+
return "Dangling argument(s) found e.g. #{argv[0] }"
|
|
70
|
+
else
|
|
71
|
+
if options[:path_ids]
|
|
72
|
+
if options[:paths]
|
|
73
|
+
return "Multiple ways to define the path given, one at a time please"
|
|
74
|
+
end
|
|
75
|
+
else
|
|
76
|
+
if options[:paths].nil? or options[:paths].empty?
|
|
77
|
+
return "No path defined, so don't know how to procede through the graph"
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# Need reads unless there is already an assembly
|
|
83
|
+
unless options[:previous_assembly] or options[:previously_serialized_parsed_graph_file]
|
|
84
|
+
return Bio::FinishM::ReadInput.new.validate_options(options, [])
|
|
85
|
+
else
|
|
86
|
+
return nil
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def run(options, argv)
|
|
92
|
+
read_input = Bio::FinishM::ReadInput.new
|
|
93
|
+
read_input.parse_options options
|
|
94
|
+
|
|
95
|
+
# Generate the assembly graph
|
|
96
|
+
log.info "Reading in or generating the assembly graph"
|
|
97
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
|
|
98
|
+
|
|
99
|
+
print_trail = lambda do |oriented_trail|
|
|
100
|
+
print '>'
|
|
101
|
+
puts oriented_trail.to_shorthand
|
|
102
|
+
puts oriented_trail.sequence
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
if options[:path_ids]
|
|
106
|
+
trail = Bio::Velvet::Graph::OrientedNodeTrail.create_from_super_shorthand(options[:path_ids], finishm_graph.graph)
|
|
107
|
+
print_trail.call trail
|
|
108
|
+
|
|
109
|
+
else
|
|
110
|
+
# Build the oriented node trail
|
|
111
|
+
log.info "Building the trail(s) from the nodes"
|
|
112
|
+
options[:paths].each do |path|
|
|
113
|
+
trail = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
114
|
+
path.each do |stone|
|
|
115
|
+
log.debug "Adding stone to the trail: #{stone.inspect}"
|
|
116
|
+
node = finishm_graph.graph.nodes[stone.node_id]
|
|
117
|
+
if node.nil?
|
|
118
|
+
raise "Unable to find node ID #{stone.node_id} in the graph, so cannot continue"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# check that the path actually connects in the graph, otherwise stop.
|
|
122
|
+
is_neighbour = false
|
|
123
|
+
unless trail.length == 0 #don't worry about the first stepping stone
|
|
124
|
+
trail.neighbours_of_last_node(finishm_graph.graph).each do |oneigh|
|
|
125
|
+
log.debug "Considering neighbour #{oneigh.inspect}"
|
|
126
|
+
is_neighbour = true if oneigh.node == node and oneigh.first_side == stone.first_side
|
|
127
|
+
end
|
|
128
|
+
unless is_neighbour
|
|
129
|
+
raise "In the graph, the node #{trail.last.to_s} does not connect with #{stone.inspect}"
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# OK, all the checking done. Actually add it to the trail
|
|
134
|
+
trail.add_node node, stone.first_side
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Print the sequence
|
|
138
|
+
print_trail.call trail
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
class Bio::FinishM::Visualise
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
|
|
4
|
+
DEFAULT_OPTIONS = {
|
|
5
|
+
:min_adjoining_reads => 2,
|
|
6
|
+
:max_adjoining_node_coverage => 300,
|
|
7
|
+
:graph_search_leash_length => 20000,
|
|
8
|
+
:interesting_probes => nil,
|
|
9
|
+
:max_nodes => 50,
|
|
10
|
+
:contig_end_length => 200
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
def add_options(optparse_object, options)
|
|
14
|
+
options.merge! DEFAULT_OPTIONS
|
|
15
|
+
optparse_object.banner = "\nUsage: finishm visualise --assembly-??? <output_visualisation_file>
|
|
16
|
+
|
|
17
|
+
Visualise an assembly graph
|
|
18
|
+
\n\n"
|
|
19
|
+
|
|
20
|
+
optparse_object.separator "Output visualisation formats (one or more of these must be used)"
|
|
21
|
+
add_visualisation_options(optparse_object, options)
|
|
22
|
+
|
|
23
|
+
optparse_object.separator "Input genome information"
|
|
24
|
+
optparse_object.separator "\nIf an assembly is to be done, there must be some definition of reads:\n\n" #TODO improve this help
|
|
25
|
+
Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
|
|
26
|
+
|
|
27
|
+
optparse_object.separator "\nOptional graph-exploration arguments:\n\n"
|
|
28
|
+
add_scaffold_options(optparse_object, options)
|
|
29
|
+
add_probe_options(optparse_object, options)
|
|
30
|
+
|
|
31
|
+
optparse_object.separator "\nOptional graph-related arguments:\n\n"
|
|
32
|
+
Bio::FinishM::GraphGenerator.new.add_options(optparse_object, options)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def validate_options(options, argv)
|
|
36
|
+
#TODO: give a better description of the error that has occurred
|
|
37
|
+
#TODO: require reads options
|
|
38
|
+
return validate_argv_length(argv) ||
|
|
39
|
+
validate_visualisation_options(options) ||
|
|
40
|
+
validate_probe_options(options) ||
|
|
41
|
+
validate_assembly_options(options)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def add_visualisation_options(optparse_object, options)
|
|
45
|
+
optparse_object.on("--assembly-svg PATH", "Output assembly as a SVG file [default: off]") do |arg|
|
|
46
|
+
options[:output_graph_svg] = arg
|
|
47
|
+
end
|
|
48
|
+
optparse_object.on("--assembly-png PATH", "Output assembly as a PNG file [default: off]") do |arg|
|
|
49
|
+
options[:output_graph_png] = arg
|
|
50
|
+
end
|
|
51
|
+
optparse_object.on("--assembly-dot PATH", "Output assembly as a DOT file [default: off]") do |arg|
|
|
52
|
+
options[:output_graph_dot] = arg
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def validate_visualisation_options(options)
|
|
57
|
+
if options[:output_graph_png].nil? and options[:output_graph_svg].nil? and options[:output_graph_dot].nil?
|
|
58
|
+
return "No visualisation output format/file given, don't know how to visualise"
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def add_scaffold_options(optparse_object, options)
|
|
63
|
+
optparse_object.on("--genomes FASTA_1[,FASTA_2...]", Array, "Fasta files of genomes used in the assembly. Required if --scaffolds is given [default: unused]") do |arg|
|
|
64
|
+
options[:assembly_files] = arg
|
|
65
|
+
end
|
|
66
|
+
optparse_object.on("--scaffolds SIDE_1[,SIDE_2...]", Array, "explore from these scaffold ends e.g 'contig1s' for the start of contig1, 'contig1e' for the end of contig1, and 'contig1,contig3e' for both sides of contig1 and the end of contig3 [default: unused]") do |arg|
|
|
67
|
+
options[:scaffold_sides] = arg.collect do |side|
|
|
68
|
+
if side.match(/[se]$/)
|
|
69
|
+
side
|
|
70
|
+
else
|
|
71
|
+
["#{side}s","#{side}e"]
|
|
72
|
+
end
|
|
73
|
+
end.flatten
|
|
74
|
+
end
|
|
75
|
+
optparse_object.on("--overhang NUM", Integer, "Start assembling this far from the ends of the contigs [default: #{options[:contig_end_length]}]") do |arg|
|
|
76
|
+
options[:contig_end_length] = arg.to_i
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def validate_scaffold_options(options)
|
|
81
|
+
# If scaffolds are defined, then probe genomes must also be defined
|
|
82
|
+
if options[:scaffolds] and !options[:assembly_files]
|
|
83
|
+
return "If --scaffolds is defined, so then must --genomes"
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def add_probe_options(optparse_object, options)
|
|
88
|
+
optparse_object.on("--probe-ids PROBE_IDS", Array, "explore from these probe IDs in the graph (comma separated). probe ID is the ID in the velvet Sequence file. See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg|
|
|
89
|
+
options[:interesting_probes] = arg.collect do |read|
|
|
90
|
+
read_id = read.to_i
|
|
91
|
+
if read_id.to_s != read or read_id.nil? or read_id < 1
|
|
92
|
+
raise "Unable to parse probe ID #{read}, from #{arg}, cannot continue"
|
|
93
|
+
end
|
|
94
|
+
read_id
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
optparse_object.on("--probe-ids-file PROBE_IDS_FILE", String, "explore from the probe IDs given in the file (1 probe ID per line). See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg|
|
|
98
|
+
raise "Cannot specify both --probe-ids and --probe-ids-file sorry" if options[:interesting_probes]
|
|
99
|
+
options[:interesting_probes] = []
|
|
100
|
+
log.info "Reading probe IDs from file: `#{arg}'"
|
|
101
|
+
File.foreach(arg) do |line|
|
|
102
|
+
line.strip!
|
|
103
|
+
next if line == '' or line.nil?
|
|
104
|
+
read_id = line.to_i
|
|
105
|
+
if read_id.to_s != line or read_id < 1 or read_id.nil?
|
|
106
|
+
raise "Unable to parse probe ID #{line}, from file #{arg}, cannot continue"
|
|
107
|
+
end
|
|
108
|
+
options[:interesting_probes].push read_id
|
|
109
|
+
end
|
|
110
|
+
log.info "Read #{options[:interesting_probes].length} probes in"
|
|
111
|
+
end
|
|
112
|
+
optparse_object.on("--probe-names-file PROBE_NAMES_FILE", String, "explore from the probe names (i.e. the first word in the fasta/fastq header) given in the file (1 probe name per line). See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg|
|
|
113
|
+
raise "Cannot specify any two of --probe-names-file, --probe-ids and --probe-ids-file sorry" if options[:interesting_probes]
|
|
114
|
+
options[:interesting_probe_names] = []
|
|
115
|
+
log.info "Reading probe names from file: `#{arg}'"
|
|
116
|
+
File.foreach(arg) do |line|
|
|
117
|
+
line.strip!
|
|
118
|
+
next if line == '' or line.nil?
|
|
119
|
+
options[:interesting_probe_names].push line.split(/\s/)[0]
|
|
120
|
+
end
|
|
121
|
+
log.info "Read #{options[:interesting_probe_names].length} probes names in"
|
|
122
|
+
end
|
|
123
|
+
optparse_object.on("--probe-to-node-map FILE", String, "Output a tab separated file containing the read IDs and their respective node IDs [default: no output]") do |arg|
|
|
124
|
+
options[:probe_to_node_map] = arg
|
|
125
|
+
end
|
|
126
|
+
optparse_object.on("--node-ids NODE_IDS", Array, "explore from these nodes in the graph (comma separated). Node IDs are the nodes in the velvet graph. See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg|
|
|
127
|
+
options[:interesting_nodes] = arg.collect do |read|
|
|
128
|
+
node_id = read.to_i
|
|
129
|
+
if node_id.to_s != read or node_id.nil? or node_id < 1
|
|
130
|
+
raise "Unable to parse node ID #{read}, from #{arg}, cannot continue"
|
|
131
|
+
end
|
|
132
|
+
node_id
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
optparse_object.on("--leash-length NUM", Integer, "Don't explore too far in the graph, only this far and not much more [default: unused unless --probe-ids or --nodes is specified, otherwise #{options[:graph_search_leash_length] }]") do |arg|
|
|
136
|
+
options[:graph_search_leash_length] = arg
|
|
137
|
+
end
|
|
138
|
+
optparse_object.on("--max-nodes NUM", Integer, "Maximum number of nodes to explore out from each probe node, or 0 for no maximum [default: #{options[:max_nodes] }]") do |arg|
|
|
139
|
+
if arg==0
|
|
140
|
+
options[:max_nodes] = nil
|
|
141
|
+
else
|
|
142
|
+
options[:max_nodes] = arg
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def validate_probe_options(options)
|
|
148
|
+
if options[:interesting_probes] and options[:interesting_nodes]
|
|
149
|
+
return "Can only be interested in probes or nodes, not both, at least currently"
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def validate_assembly_options(options)
|
|
154
|
+
# Need reads unless there is already an assembly
|
|
155
|
+
unless options[:previous_assembly] or options[:previously_serialized_parsed_graph_file]
|
|
156
|
+
return Bio::FinishM::ReadInput.new.validate_options(options, [])
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def validate_argv_length(argv)
|
|
161
|
+
if argv.length != 0
|
|
162
|
+
return "Dangling argument(s) found e.g. #{argv[0] }"
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def run(options, argv)
|
|
167
|
+
read_input = Bio::FinishM::ReadInput.new
|
|
168
|
+
read_input.parse_options options
|
|
169
|
+
|
|
170
|
+
# Generate the assembly graph
|
|
171
|
+
log.info "Reading in or generating the assembly graph"
|
|
172
|
+
|
|
173
|
+
if options[:interesting_probes] or options[:interesting_probe_names]
|
|
174
|
+
finishm_graph, interesting_node_ids = generate_graph_from_probes(read_input, options)
|
|
175
|
+
|
|
176
|
+
if (options[:interesting_probes] or options[:interesting_probe_names]) and options[:probe_to_node_map]
|
|
177
|
+
write_probe_to_node_map(options[:probe_to_node_map], finishm_graph, options[:interesting_probes])
|
|
178
|
+
end
|
|
179
|
+
elsif options[:interesting_nodes]
|
|
180
|
+
finishm_graph = generate_graph_from_nodes(read_input, options)
|
|
181
|
+
interesting_node_ids = options[:interesting_nodes]
|
|
182
|
+
elsif options[:assembly_files]
|
|
183
|
+
finishm_graph, interesting_node_ids, node_id_to_nickname = generate_graph_from_assembly(read_input, options)
|
|
184
|
+
options[:node_id_to_nickname] = node_id_to_nickname
|
|
185
|
+
else
|
|
186
|
+
# Visualising the entire graph
|
|
187
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
if options[:graph_search_leash_length] and interesting_node_ids
|
|
192
|
+
#log.info "Finding nodes within the leash length of #{options[:graph_search_leash_length] }.."
|
|
193
|
+
nodes_within_leash, node_ids_at_leash = get_nodes_within_leash(finishm_graph, interesting_node_ids, options)
|
|
194
|
+
log.info "Found #{node_ids_at_leash.length} nodes at the end of the #{options[:graph_search_leash_length] }bp leash" if options[:graph_search_leash_length]
|
|
195
|
+
|
|
196
|
+
options.merge!({
|
|
197
|
+
:start_node_ids => interesting_node_ids,
|
|
198
|
+
:nodes => nodes_within_leash,
|
|
199
|
+
:end_node_ids => node_ids_at_leash,
|
|
200
|
+
|
|
201
|
+
})
|
|
202
|
+
else
|
|
203
|
+
options[:nodes] = finishm_graph.graph.nodes
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Determine paired-end connections
|
|
207
|
+
log.info "Determining paired-end node connections.."
|
|
208
|
+
paired_end_links = find_paired_end_linkages(finishm_graph, options[:nodes])
|
|
209
|
+
options[:paired_nodes_hash] = paired_end_links
|
|
210
|
+
|
|
211
|
+
create_graphviz_output(finishm_graph, options)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def create_graphviz_output(finishm_graph, options)
|
|
215
|
+
log.info "Converting assembly to a graphviz.."
|
|
216
|
+
gv = Bio::Assembly::ABVisualiser.new.graphviz(finishm_graph.graph, {
|
|
217
|
+
:start_node_ids => options[:start_node_ids],
|
|
218
|
+
:nodes => options[:nodes],
|
|
219
|
+
:end_node_ids => options[:end_node_ids],
|
|
220
|
+
:paired_nodes_hash => options[:paired_nodes_hash],
|
|
221
|
+
:node_id_to_nickname => options[:node_id_to_nickname]
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
# Convert gv object to something actually pictorial
|
|
225
|
+
if options[:output_graph_png]
|
|
226
|
+
log.info "Writing PNG #{options[:output_graph_png] }"
|
|
227
|
+
gv.output :png => options[:output_graph_png], :use => :neato
|
|
228
|
+
end
|
|
229
|
+
if options[:output_graph_svg]
|
|
230
|
+
log.info "Writing SVG #{options[:output_graph_svg] }"
|
|
231
|
+
gv.output :svg => options[:output_graph_svg], :use => :neato
|
|
232
|
+
end
|
|
233
|
+
if options[:output_graph_dot]
|
|
234
|
+
log.info "Writing DOT #{options[:output_graph_dot] }"
|
|
235
|
+
gv.output :dot => options[:output_graph_dot], :use => :neato
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def generate_graph_from_probes(read_input, options)
|
|
240
|
+
# Looking based on probes
|
|
241
|
+
if options[:interesting_probe_names]
|
|
242
|
+
if options[:interesting_probe_names].length > 5
|
|
243
|
+
log.info "Targeting #{options[:interesting_probe_names].length} probes #{options[:interesting_probe_names][0..4].join(', ') }, ..."
|
|
244
|
+
else
|
|
245
|
+
log.info "Targeting #{options[:interesting_probe_names].length} probes #{options[:interesting_probe_names].inspect}"
|
|
246
|
+
end
|
|
247
|
+
options[:probe_read_names] = options[:interesting_probe_names]
|
|
248
|
+
else
|
|
249
|
+
if options[:interesting_probes].length > 5
|
|
250
|
+
log.info "Targeting #{options[:interesting_probes].length} probes #{options[:interesting_probes][0..4].join(', ') }, ..."
|
|
251
|
+
else
|
|
252
|
+
log.info "Targeting #{options[:interesting_probes].length} probes #{options[:interesting_probes].inspect}"
|
|
253
|
+
end
|
|
254
|
+
options[:probe_reads] = options[:interesting_probes]
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
|
|
258
|
+
interesting_node_ids = finishm_graph.probe_nodes.reject{|n| n.nil?}.collect{|node| node.node_id}
|
|
259
|
+
|
|
260
|
+
return finishm_graph, interesting_node_ids
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
def generate_graph_from_nodes(read_input, options)
|
|
264
|
+
# Looking based on nodes
|
|
265
|
+
if options[:interesting_nodes].length > 5
|
|
266
|
+
log.info "Targeting #{options[:interesting_nodes].length} nodes #{options[:interesting_nodes][0..4].join(', ') }, ..."
|
|
267
|
+
else
|
|
268
|
+
log.info "Targeting #{options[:interesting_nodes].length} node(s) #{options[:interesting_nodes].inspect}"
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
|
|
272
|
+
interesting_node_ids = options[:interesting_nodes]
|
|
273
|
+
|
|
274
|
+
return finishm_graph, interesting_node_ids
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def generate_graph_from_assembly(read_input, options)
|
|
278
|
+
# Parse the genome fasta file in
|
|
279
|
+
genomes = Bio::FinishM::InputGenome.parse_genome_fasta_files(
|
|
280
|
+
options[:assembly_files],
|
|
281
|
+
options[:contig_end_length],
|
|
282
|
+
options
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
# Create hash of contig end name to probe index
|
|
286
|
+
contig_name_to_probe = {}
|
|
287
|
+
genomes.each do |genome|
|
|
288
|
+
genome.scaffolds.each_with_index do |swaff, scaffold_index|
|
|
289
|
+
probes = [
|
|
290
|
+
genome.first_probe(scaffold_index),
|
|
291
|
+
genome.last_probe(scaffold_index)
|
|
292
|
+
]
|
|
293
|
+
probes.each do |probe|
|
|
294
|
+
key = nil
|
|
295
|
+
if probe.side == :start
|
|
296
|
+
key = "#{probe.contig.scaffold.name}s"
|
|
297
|
+
elsif probe.side == :end
|
|
298
|
+
key = "#{probe.contig.scaffold.name}e"
|
|
299
|
+
else
|
|
300
|
+
raise "Programming error"
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
if contig_name_to_probe.key?(key)
|
|
304
|
+
log.error "Encountered multiple contigs with the same name, this might cause problems, so quitting #{key}"
|
|
305
|
+
end
|
|
306
|
+
contig_name_to_probe[key] = probe.index
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
# Gather a list of probe indexes that are of interest to the user
|
|
312
|
+
interesting_probe_ids = []
|
|
313
|
+
if options[:scaffold_sides]
|
|
314
|
+
# If looking at specified ends
|
|
315
|
+
nodes_to_start_from = options[:scaffold_sides].collect do |side|
|
|
316
|
+
if probe = contig_name_to_probe[side]
|
|
317
|
+
interesting_probe_ids << probe
|
|
318
|
+
else
|
|
319
|
+
raise "Unable to find scaffold side in given genome: #{side}"
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
log.info "Found #{interesting_probe_ids.length} scaffold sides in the assembly of interest"
|
|
323
|
+
else
|
|
324
|
+
# else looking at all the contig ends in all the genomes
|
|
325
|
+
interesting_probe_ids = contig_name_to_probe.values
|
|
326
|
+
log.info "Visualising all #{interesting_probe_ids.length} contig ends in all genomes"
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Generate the graph
|
|
330
|
+
probe_sequences = genomes.collect{|genome| genome.probe_sequences}.flatten
|
|
331
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probe_sequences, read_input, options)
|
|
332
|
+
|
|
333
|
+
# Convert probe IDs into node IDs
|
|
334
|
+
interesting_node_ids = interesting_probe_ids.collect do |pid|
|
|
335
|
+
finishm_graph.probe_nodes[pid].node_id
|
|
336
|
+
end.uniq
|
|
337
|
+
|
|
338
|
+
# create a nickname hash, id of node to name. Include all nodes even if they weren't specified directly (they only get visualised if they are within leash length of another)
|
|
339
|
+
node_id_to_nickname = {}
|
|
340
|
+
contig_name_to_probe.each do |name, probe|
|
|
341
|
+
key = finishm_graph.probe_nodes[probe].node_id
|
|
342
|
+
if node_id_to_nickname.key?(key)
|
|
343
|
+
node_id_to_nickname[key] += " "+name
|
|
344
|
+
else
|
|
345
|
+
node_id_to_nickname[key] = name
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
return finishm_graph, interesting_node_ids, node_id_to_nickname
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
def get_nodes_within_leash(finishm_graph, node_ids, options={})
|
|
353
|
+
log.info "Finding nodes within the leash length of #{options[:graph_search_leash_length] } with maximum node count #{options[:max_nodes] }.."
|
|
354
|
+
dijkstra = Bio::AssemblyGraphAlgorithms::Dijkstra.new
|
|
355
|
+
|
|
356
|
+
@finder = Bio::FinishM::PairedEndNeighbourFinder.new(finishm_graph, 500) #TODO: this hard-coded 100 isn't great here
|
|
357
|
+
@finder.min_adjoining_reads = options[:min_adjoining_reads]
|
|
358
|
+
@finder.max_adjoining_node_coverage = options[:max_adjoining_node_coverage]
|
|
359
|
+
|
|
360
|
+
nodes_within_leash_hash = dijkstra.min_distances_from_many_nodes_in_both_directions(
|
|
361
|
+
finishm_graph.graph, node_ids.collect{|n| finishm_graph.graph.nodes[n]}, {
|
|
362
|
+
:ignore_directions => true,
|
|
363
|
+
:leash_length => options[:graph_search_leash_length],
|
|
364
|
+
:max_nodes => options[:max_nodes],
|
|
365
|
+
:neighbour_finder => @finder
|
|
366
|
+
})
|
|
367
|
+
nodes_within_leash = nodes_within_leash_hash.keys.collect{|k| finishm_graph.graph.nodes[k[0]]}
|
|
368
|
+
log.info "Found #{nodes_within_leash.collect{|o| o.node_id}.uniq.length} node(s) within the leash length"
|
|
369
|
+
|
|
370
|
+
# These nodes are at the end of the leash - a node is in here iff
|
|
371
|
+
# it has a neighbour that is not in the nodes_within_leash
|
|
372
|
+
node_ids_at_leash = Set.new
|
|
373
|
+
nodes_within_leash_hash.keys.each do |node_and_direction|
|
|
374
|
+
# Add it to the set if 1 or more nieghbours are not in the original set
|
|
375
|
+
node = finishm_graph.graph.nodes[node_and_direction[0]]
|
|
376
|
+
onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new node, node_and_direction[1]
|
|
377
|
+
onode.next_neighbours(finishm_graph.graph).each do |oneigh|
|
|
378
|
+
if !nodes_within_leash_hash.key?(oneigh.to_settable)
|
|
379
|
+
node_ids_at_leash << node_and_direction[0]
|
|
380
|
+
break #it only takes one to be listed
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
return nodes_within_leash.uniq, node_ids_at_leash.to_a.uniq
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def find_paired_end_linkages(finishm_graph, node_array)
|
|
389
|
+
return {} if @finder.nil?
|
|
390
|
+
|
|
391
|
+
paired_end_links = {}
|
|
392
|
+
node_array.each do |node|
|
|
393
|
+
paired_end_links[node.node_id] = []
|
|
394
|
+
[Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST,
|
|
395
|
+
Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST].each do |direction|
|
|
396
|
+
onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new(node, direction)
|
|
397
|
+
|
|
398
|
+
paired_end_links[node.node_id].push @finder.neighbours(onode).collect{|n| n.node.node_id}.uniq
|
|
399
|
+
end
|
|
400
|
+
paired_end_links[node.node_id].flatten!
|
|
401
|
+
end
|
|
402
|
+
return paired_end_links
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
# Write to a file probe_to_node_map_file a map that shows the
|
|
406
|
+
# probe ID, which node that probe is on, and the name of the probe
|
|
407
|
+
def write_probe_to_node_map(probe_to_node_map_file, finishm_graph, names)
|
|
408
|
+
log.info "Writing probe-to-node map to #{x}.."
|
|
409
|
+
File.open(probe_to_node_map_file,'w') do |f|
|
|
410
|
+
f.puts %w(probe_number probe node direction).join("\t")
|
|
411
|
+
finishm_graph.probe_nodes.each_with_index do |node, i|
|
|
412
|
+
if node.nil?
|
|
413
|
+
f.puts [
|
|
414
|
+
i+1,
|
|
415
|
+
names[i],
|
|
416
|
+
'-',
|
|
417
|
+
'-',
|
|
418
|
+
].join("\t")
|
|
419
|
+
else
|
|
420
|
+
f.puts [
|
|
421
|
+
i+1,
|
|
422
|
+
names[i],
|
|
423
|
+
node.node_id,
|
|
424
|
+
finishm_graph.probe_node_directions[i] == true ? 'forward' : 'reverse',
|
|
425
|
+
].join("\t")
|
|
426
|
+
end
|
|
427
|
+
end
|
|
428
|
+
end
|
|
429
|
+
end
|
|
430
|
+
end
|