finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
class Bio::FinishM::Wanderer
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
|
|
4
|
+
DEFAULT_OPTIONS = {
|
|
5
|
+
:contig_end_length => 200,
|
|
6
|
+
:graph_search_leash_length => 20000,
|
|
7
|
+
:unscaffold_first => false,
|
|
8
|
+
:recoherence_kmer => 1,
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
def add_options(optparse_object, options)
|
|
12
|
+
optparse_object.banner = "\nUsage: finishm wander --contigs <contig_file> --fastq-gz <reads..> --output-connections <output.csv> --output-scaffolds <output.fasta>
|
|
13
|
+
|
|
14
|
+
Takes a set of contigs/scaffolds from a genome and finds connections in the graph between them. A connection here is given as
|
|
15
|
+
the length of the shortest path between them, without actually computing all the paths.
|
|
16
|
+
|
|
17
|
+
This can be used for scaffolding, because if a contig end only connects to one other contig end, then
|
|
18
|
+
those contigs might be scaffolded together.
|
|
19
|
+
|
|
20
|
+
This method can also be used for 'pre-scaffolding', in the following sense. If the shortest path between
|
|
21
|
+
two contig ends is 10kb, and a mate pair library with insert size 2kb suggests a linkage
|
|
22
|
+
between the two ends, then the mate pair linkage is likely false (as long as there is sufficient
|
|
23
|
+
coverage in the reads, and not overwhelming amounts of strain heterogeneity, etc.).
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
|
|
27
|
+
finishm wander --contigs contigs.fasta --fastq-gz reads.1.fq.gz,reads.2.fq.gz --output-scaffolds scaffolds.fasta
|
|
28
|
+
|
|
29
|
+
That will create a collapsed de-Bruijn graph from reads.1.fq.gz and reads.2.fq.gz, then try to find connections between
|
|
30
|
+
the starts and the ends of the contigs in contigs.fasta through the de-Bruijn graph. The new scaffolds are then
|
|
31
|
+
output to scaffolds.fasta
|
|
32
|
+
|
|
33
|
+
\n\n"
|
|
34
|
+
|
|
35
|
+
options.merge!(DEFAULT_OPTIONS)
|
|
36
|
+
|
|
37
|
+
optparse_object.separator "\nRequired arguments:\n\n"
|
|
38
|
+
optparse_object.on("--contigs FILE", "fasta file of single contig containing Ns that are to be closed [required]") do |arg|
|
|
39
|
+
options[:contigs_file] = arg
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
optparse_object.separator "\nOutput modes:\n\n"
|
|
43
|
+
optparse_object.on("--output-scaffolds FILE", "Output scaffolds in FASTA format [required]") do |arg|
|
|
44
|
+
options[:output_scaffolds_file] = arg
|
|
45
|
+
end
|
|
46
|
+
optparse_object.on("--output-connections FILE", "Output connections in tab-separated format [required]") do |arg|
|
|
47
|
+
options[:output_connection_file] = arg
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
optparse_object.separator "\nThere must be some definition of reads too:\n\n" #TODO improve this help
|
|
51
|
+
Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
|
|
52
|
+
|
|
53
|
+
optparse_object.separator "\nOptional arguments:\n\n"
|
|
54
|
+
optparse_object.on("--overhang NUM", Integer, "Start assembling this far from the ends of the contigs [default: #{options[:contig_end_length] }]") do |arg|
|
|
55
|
+
options[:contig_end_length] = arg.to_i
|
|
56
|
+
end
|
|
57
|
+
optparse_object.on("--recoherence-kmer NUM", Integer, "Use a kmer longer than the original velvet one, to help remove bubbles and circular paths [default: none]") do |arg|
|
|
58
|
+
options[:recoherence_kmer] = arg
|
|
59
|
+
end
|
|
60
|
+
optparse_object.on("--leash-length NUM", Integer, "Don't explore too far in the graph, only this far and not much more [default: #{options[:graph_search_leash_length] }]") do |arg|
|
|
61
|
+
options[:graph_search_leash_length] = arg
|
|
62
|
+
end
|
|
63
|
+
optparse_object.on("--unscaffold-first", "Break the scaffolds in the contigs file apart, and then wander between the resultant contigs. [default: #{options[:unscaffold_first] }]") do
|
|
64
|
+
options[:unscaffold_first] = true
|
|
65
|
+
end
|
|
66
|
+
optparse_object.on("--proceed-on-short-contigs", "By default, when overly short contigs are encountered, finishm croaks. This option stops the croaking [default: #{options[:proceed_on_short_contigs] }]") do
|
|
67
|
+
options[:proceed_on_short_contigs] = true
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
Bio::FinishM::GraphGenerator.new.add_options optparse_object, options
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def validate_options(options, argv)
|
|
74
|
+
#TODO: give a better description of the error that has occurred
|
|
75
|
+
#TODO: require reads options
|
|
76
|
+
if argv.length != 0
|
|
77
|
+
return "Dangling argument(s) found e.g. #{argv[0] }"
|
|
78
|
+
else
|
|
79
|
+
[
|
|
80
|
+
:contigs_file,
|
|
81
|
+
].each do |sym|
|
|
82
|
+
if options[sym].nil?
|
|
83
|
+
return "No option found to specify #{sym}."
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
if options[:output_scaffolds_file].nil? and
|
|
87
|
+
options[:output_connection_file].nil?
|
|
88
|
+
return "Need to specify either output scaffolds or output connections file"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
#if return nil from here, options all were parsed successfully
|
|
92
|
+
return Bio::FinishM::ReadInput.new.validate_options(options, [])
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def run(options, argv=[])
|
|
97
|
+
# Read in all the contigs sequences, removing those that are too short
|
|
98
|
+
probe_sequences = []
|
|
99
|
+
contig_sequences = []
|
|
100
|
+
contig_names = []
|
|
101
|
+
overly_short_sequence_count = 0
|
|
102
|
+
process_sequence = lambda do |name, seq|
|
|
103
|
+
if seq.length < 2*options[:contig_end_length]
|
|
104
|
+
log.warn "Not attempting to make connections from this contig, as it is overly short: #{name}"
|
|
105
|
+
overly_short_sequence_count += 1
|
|
106
|
+
nil
|
|
107
|
+
else
|
|
108
|
+
contig_sequences.push seq.to_s
|
|
109
|
+
contig_names.push name
|
|
110
|
+
|
|
111
|
+
sequence = seq.seq
|
|
112
|
+
fwd2 = Bio::Sequence::NA.new(sequence[0...options[:contig_end_length]])
|
|
113
|
+
probe_sequences.push fwd2.reverse_complement.to_s
|
|
114
|
+
|
|
115
|
+
probe_sequences.push sequence[(sequence.length-options[:contig_end_length])...sequence.length]
|
|
116
|
+
|
|
117
|
+
# 'return' the probe indices that have been assigned
|
|
118
|
+
[probe_sequences.length-2, probe_sequences.length-1]
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
scaffolds = nil #Array of Bio::FinishM::ScaffoldBreaker::Scaffold objects.
|
|
123
|
+
scaffolded_contig_to_probe_ids = {}
|
|
124
|
+
if options[:unscaffold_first]
|
|
125
|
+
log.info "Unscaffolding scaffolds (before trying to connect them together again)"
|
|
126
|
+
scaffolds = Bio::FinishM::ScaffoldBreaker.new.break_scaffolds options[:contigs_file]
|
|
127
|
+
scaffolds.each do |scaffold|
|
|
128
|
+
scaffold.contigs.each do |contig|
|
|
129
|
+
process_sequence.call contig.name, contig.sequence
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
else
|
|
133
|
+
# Else don't split up any of the sequences
|
|
134
|
+
log.info "Reading input sequences.."
|
|
135
|
+
Bio::FlatFile.foreach(options[:contigs_file]) do |seq|
|
|
136
|
+
process_sequence.call seq.definition, seq.seq
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
if overly_short_sequence_count > 0
|
|
141
|
+
unless options[:proceed_on_short_contigs]
|
|
142
|
+
raise "Not proceding as some contigs are too short (length < 2 * overhang). You might try: "+
|
|
143
|
+
"(1) omitting the smaller contigs, (2) reducing the --overhang parameter, or "+
|
|
144
|
+
"(3) using --proceed-on-short-contigs to continue optimistically ignoring the #{overly_short_sequence_count} short contigs"
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
log.info "Searching from #{probe_sequences.length} different contig ends (#{probe_sequences.length / 2} contigs)"
|
|
149
|
+
|
|
150
|
+
# Generate the graph with the probe sequences in it.
|
|
151
|
+
read_input = Bio::FinishM::ReadInput.new
|
|
152
|
+
read_input.parse_options options
|
|
153
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probe_sequences, read_input, options)
|
|
154
|
+
|
|
155
|
+
log.info "Finding possible connections with recoherence kmer of #{options[:recoherence_kmer] }"
|
|
156
|
+
all_connections = probed_graph_to_connections(finishm_graph, options)
|
|
157
|
+
log.debug "Finished actual wandering, found #{all_connections.length} connections" if log.debug?
|
|
158
|
+
|
|
159
|
+
# Determine scaffolding connections
|
|
160
|
+
interpreter = Bio::FinishM::ConnectionInterpreter.new(all_connections, (0...contig_sequences.length))
|
|
161
|
+
connections = interpreter.doubly_single_contig_connections
|
|
162
|
+
log.debug "Found #{connections.length} connections between contigs that can be used for scaffolding" if log.debug?
|
|
163
|
+
scaffolds = interpreter.scaffolds(connections)
|
|
164
|
+
|
|
165
|
+
# Gather some stats
|
|
166
|
+
circular_scaffold_names = []
|
|
167
|
+
num_contigs_in_circular_scaffolds = 0
|
|
168
|
+
num_singleton_contigs = 0
|
|
169
|
+
num_scaffolded_contigs = 0
|
|
170
|
+
scaffolds.each do |scaffold|
|
|
171
|
+
if scaffold.circular?
|
|
172
|
+
circular_scaffold_names.push name
|
|
173
|
+
num_contigs_in_circular_scaffolds += scaffold.contigs.length
|
|
174
|
+
elsif scaffold.contigs.length == 1
|
|
175
|
+
num_singleton_contigs += 1
|
|
176
|
+
else
|
|
177
|
+
num_scaffolded_contigs += scaffold.contigs.length
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
log.info "Found #{circular_scaffold_names.length} circular scaffolds encompassing #{num_contigs_in_circular_scaffolds} contigs"
|
|
181
|
+
log.info "#{num_scaffolded_contigs} contigs were incorporated into scaffolds"
|
|
182
|
+
log.info "#{num_singleton_contigs} contigs were not incorporated into any scaffolds"
|
|
183
|
+
|
|
184
|
+
unless options[:output_scaffolds_file].nil?
|
|
185
|
+
File.open(options[:output_scaffolds_file],'w') do |scaffold_file|
|
|
186
|
+
scaffolds.each_with_index do |scaffold, i|
|
|
187
|
+
name = nil
|
|
188
|
+
if scaffold.contigs.length == 1
|
|
189
|
+
name = "scaffold#{i+1}"
|
|
190
|
+
else
|
|
191
|
+
name = "scaffold#{i+1}"
|
|
192
|
+
end
|
|
193
|
+
if scaffold.circular?
|
|
194
|
+
name += ' circular'
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
scaffold_file.puts ">#{name}"
|
|
198
|
+
# Output the NA sequence wrapped
|
|
199
|
+
seq = scaffold.sequence(contig_sequences)
|
|
200
|
+
scaffold_file.puts seq.gsub(/(.{80})/,"\\1\n").gsub(/\n$/,'')
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Write out all connections to the given file if wanted
|
|
206
|
+
unless options[:output_connection_file].nil?
|
|
207
|
+
File.open(options[:output_connection_file], 'w') do |out|
|
|
208
|
+
all_connections.each do |conn|
|
|
209
|
+
out.puts [
|
|
210
|
+
"#{contig_names[conn.probe1.sequence_index]}:#{conn.probe1.side}",
|
|
211
|
+
"#{contig_names[conn.probe2.sequence_index]}:#{conn.probe2.side}",
|
|
212
|
+
conn.distance
|
|
213
|
+
].join("\t")
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
log.info "All done."
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# Given a probed graph, wander between all the nodes, and then return an
|
|
222
|
+
# instance of Bio::FinishM::ConnectionInterpreter::Scaffold. Required options:
|
|
223
|
+
# * :graph_search_leash_length
|
|
224
|
+
# * :recoherence_kmer
|
|
225
|
+
def probed_graph_to_connections(finishm_graph, options)
|
|
226
|
+
# Loop over the ends, trying to make connections from each one
|
|
227
|
+
cartographer = Bio::AssemblyGraphAlgorithms::SingleCoherentWanderer.new
|
|
228
|
+
|
|
229
|
+
first_connections = cartographer.wander(finishm_graph, options[:graph_search_leash_length], options[:recoherence_kmer], finishm_graph.velvet_sequences, options)
|
|
230
|
+
log.debug "Initially found #{first_connections.length} connections with less distance than the leash length" if log.debug?
|
|
231
|
+
|
|
232
|
+
probe_descriptions = []
|
|
233
|
+
(0...finishm_graph.probe_nodes.length).each do |i|
|
|
234
|
+
desc = Bio::FinishM::ConnectionInterpreter::Probe.new
|
|
235
|
+
if i % 2 == 0
|
|
236
|
+
desc.side = :start
|
|
237
|
+
desc.sequence_index = i / 2
|
|
238
|
+
else
|
|
239
|
+
desc.side = :end
|
|
240
|
+
desc.sequence_index = (i-1) / 2
|
|
241
|
+
end
|
|
242
|
+
probe_descriptions.push desc
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Gather connections ready for output
|
|
246
|
+
distance_calibrator = Bio::AssemblyGraphAlgorithms::AcyclicConnectionFinder.new
|
|
247
|
+
all_connections = []
|
|
248
|
+
first_connections.each do |node_indices, distance|
|
|
249
|
+
calibrated_distance = distance_calibrator.calibrate_distance_accounting_for_probes(
|
|
250
|
+
finishm_graph,
|
|
251
|
+
node_indices[0],
|
|
252
|
+
node_indices[1],
|
|
253
|
+
distance
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# It is possible that a connection just larger than the leash length is returned.
|
|
257
|
+
# weed these out.
|
|
258
|
+
conn = Bio::FinishM::ConnectionInterpreter::Connection.new
|
|
259
|
+
conn.probe1 = probe_descriptions[node_indices[0]]
|
|
260
|
+
conn.probe2 = probe_descriptions[node_indices[1]]
|
|
261
|
+
conn.distance = calibrated_distance
|
|
262
|
+
if calibrated_distance > options[:graph_search_leash_length]
|
|
263
|
+
log.debug "Disregarding connection #{conn} because it was ultimately outside the allowable leash length" if log.debug?
|
|
264
|
+
else
|
|
265
|
+
all_connections.push conn
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
return all_connections
|
|
269
|
+
end
|
|
270
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# A pattern of presence/absence/neither across a run of kmers
|
|
2
|
+
class KmerAbundancePattern < Array
|
|
3
|
+
def binary_string
|
|
4
|
+
to_return = ''
|
|
5
|
+
each do |present|
|
|
6
|
+
to_return += case present
|
|
7
|
+
when true
|
|
8
|
+
'1'
|
|
9
|
+
when false
|
|
10
|
+
'0'
|
|
11
|
+
when '-'
|
|
12
|
+
'-'
|
|
13
|
+
else
|
|
14
|
+
raise "Unexpected pattern atom found: #{present}"
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
to_return
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Parse a 100001011 type representation
|
|
21
|
+
def parse_from_human(boolean_pattern)
|
|
22
|
+
self[0...length] = [] #remove the last pattern if it existed
|
|
23
|
+
boolean_pattern.each_char do |char|
|
|
24
|
+
if char == '1'
|
|
25
|
+
push true
|
|
26
|
+
elsif char == '0'
|
|
27
|
+
push false
|
|
28
|
+
elsif char == '-'
|
|
29
|
+
push nil
|
|
30
|
+
else
|
|
31
|
+
raise "Unexpected pattern character: #{char}"
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Return true if this pattern is exactly the same
|
|
37
|
+
# as another pattern
|
|
38
|
+
#
|
|
39
|
+
# e.g. 101 is same_as? 101 but not 111 or 110
|
|
40
|
+
def same_as?(another_pattern)
|
|
41
|
+
unless length == another_pattern.length
|
|
42
|
+
raise "Unexpected comparison of this pattern #{inspect} with another: #{another_pattern.inspect}"
|
|
43
|
+
end
|
|
44
|
+
each_with_index do |bool, i|
|
|
45
|
+
return false if bool != another_pattern[i]
|
|
46
|
+
end
|
|
47
|
+
return true
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Return true if another_pattern shows presence in all places
|
|
51
|
+
# where this pattern is present, (but maybe more)
|
|
52
|
+
#
|
|
53
|
+
# e.g. 101 is consisten with 101 and 111, but not 011
|
|
54
|
+
#
|
|
55
|
+
# Behaviour not defined when the first (this) pattern
|
|
56
|
+
# includes no-man's land components
|
|
57
|
+
def consistent_with?(another_pattern)
|
|
58
|
+
unless length == another_pattern.length
|
|
59
|
+
raise "Unexpected comparison of this pattern #{inspect} with another: #{another_pattern.inspect}"
|
|
60
|
+
end
|
|
61
|
+
each_with_index do |bool, i|
|
|
62
|
+
raise unless [true, false].include?(bool)
|
|
63
|
+
return false if bool and another_pattern[i] == false
|
|
64
|
+
end
|
|
65
|
+
return true
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def parse_from_kmer_abundance(abundances, lower_limit, upper_limit)
|
|
69
|
+
abundances.each do |a|
|
|
70
|
+
if a>=upper_limit
|
|
71
|
+
push true
|
|
72
|
+
elsif a<=lower_limit
|
|
73
|
+
push false
|
|
74
|
+
else
|
|
75
|
+
push '-'
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
require 'csv'
|
|
2
|
+
|
|
3
|
+
module Bio
|
|
4
|
+
# A class to work with a kmer abundance file format,
|
|
5
|
+
# where the kmer is first, then each abundance after that (no headings, space separated)
|
|
6
|
+
class KmerMultipleAbundanceHash < Hash
|
|
7
|
+
include Bio::FinishM::Logging
|
|
8
|
+
|
|
9
|
+
def self.parse_from_file(path)
|
|
10
|
+
obj = self.new
|
|
11
|
+
kmer_length = nil
|
|
12
|
+
num_abundances = nil
|
|
13
|
+
CSV.foreach(path, :col_sep => ' ') do |row|
|
|
14
|
+
kmer = row[0].upcase
|
|
15
|
+
abundances = row[1...row.length]
|
|
16
|
+
|
|
17
|
+
kmer_length ||= kmer.length
|
|
18
|
+
if kmer.length != kmer_length
|
|
19
|
+
raise "inconsistent length of kmer found in kmer abundance file, in line: #{row.inspect}"
|
|
20
|
+
end
|
|
21
|
+
num_abundances ||= abundances.length
|
|
22
|
+
if num_abundances != abundances.length
|
|
23
|
+
raise "inconsistent number of abundances found in kmer abundance file, in line: #{row.inspect}"
|
|
24
|
+
end
|
|
25
|
+
obj[kmer] = abundances
|
|
26
|
+
end
|
|
27
|
+
return obj
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def kmer_length
|
|
31
|
+
each do |kmer, abundances|
|
|
32
|
+
return kmer.length
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def number_of_abundances
|
|
37
|
+
each do |kmer, abundances|
|
|
38
|
+
return abundances.length
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def [](kmer)
|
|
43
|
+
abundances = super(kmer.upcase)
|
|
44
|
+
abundances ||= [0]*number_of_abundances
|
|
45
|
+
return abundances
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
class OligoDesigner
|
|
2
|
+
# Given a sequence, find the subsequence that starts at the 5' end (ie the
|
|
3
|
+
# start of the string), and ends when the melting temperature is maximal but
|
|
4
|
+
# below the max_temperature requires the oligotm program to be available on the
|
|
5
|
+
# cmd line.
|
|
6
|
+
#
|
|
7
|
+
# * nucleotide_string: the full sequence that we are choosing oligos from
|
|
8
|
+
# * max_temperature: the maximal temperature to start things off at
|
|
9
|
+
# * gc_clamp: require this many G or C residues at the 3' end of the oligo.
|
|
10
|
+
def just_below(nucleotide_string, max_temperature, gc_clamp=0)
|
|
11
|
+
# initial conditions
|
|
12
|
+
guess = 0
|
|
13
|
+
guess_temp = 0
|
|
14
|
+
|
|
15
|
+
# loop around
|
|
16
|
+
while guess_temp < max_temperature
|
|
17
|
+
guess += 1
|
|
18
|
+
guess_temp = melting_temperature nucleotide_string[0..guess-1]
|
|
19
|
+
|
|
20
|
+
# break if there's we've reached the end of the line
|
|
21
|
+
return nucleotide_string if guess > nucleotide_string.length
|
|
22
|
+
end
|
|
23
|
+
return nucleotide_string[0..guess-2]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Rank oligomers within some constraints.
|
|
27
|
+
def possible_oligos_ordered_by_temperature_difference(nucleotide_string, min_temperature, best_temperature, max_temperature, gc_clamp)
|
|
28
|
+
default_distance = lambda do |seq, tm|
|
|
29
|
+
# fails constraints if not enough GC clamp
|
|
30
|
+
if seq[seq.length-gc_clamp..seq.length-1].gsub(/[gc]/i,'').length > 0
|
|
31
|
+
false
|
|
32
|
+
else
|
|
33
|
+
# the sequence is within contraints. The melting temperature closest to the best wins.
|
|
34
|
+
tm_diff = (best_temperature-tm).abs
|
|
35
|
+
tm_diff
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# initial conditions
|
|
40
|
+
guess = 0
|
|
41
|
+
guess_temp = 0
|
|
42
|
+
# arrays to fill with possible possibles
|
|
43
|
+
oligos = []
|
|
44
|
+
|
|
45
|
+
# loop around, until max temperature is reached
|
|
46
|
+
while guess_temp < max_temperature
|
|
47
|
+
guess += 1
|
|
48
|
+
seq = nucleotide_string[0..guess-1]
|
|
49
|
+
guess_temp = melting_temperature seq
|
|
50
|
+
|
|
51
|
+
# Add it to the list if there is enough temperature
|
|
52
|
+
if guess_temp > min_temperature and guess_temp < max_temperature
|
|
53
|
+
o = Oligo.new
|
|
54
|
+
o.sequence = seq
|
|
55
|
+
o.tm = guess_temp
|
|
56
|
+
oligos.push o
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# break if there's we've reached the end of the line
|
|
60
|
+
break if guess > nucleotide_string.length-1
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Convert sequences into distances
|
|
64
|
+
oligos.each do |oligo|
|
|
65
|
+
oligo.distance = default_distance.call(oligo.sequence, oligo.tm)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# remove sequences that don't meet the constraints, and sort the rest with
|
|
69
|
+
# smallest distance first
|
|
70
|
+
return oligos.reject{|o| o.distance == false}.sort{|a,b|
|
|
71
|
+
a.distance<=>b.distance
|
|
72
|
+
}.collect{|o| o.sequence}
|
|
73
|
+
end
|
|
74
|
+
alias_method :order, :possible_oligos_ordered_by_temperature_difference
|
|
75
|
+
|
|
76
|
+
# A simple method to return the melting temperature of a particular nucleotide
|
|
77
|
+
# string. Uses oligotm on the command line.
|
|
78
|
+
def melting_temperature(nucleotide_string)
|
|
79
|
+
#`oligotm -tp 1 -sc 1 -n 0.8 -d 500 -mv 0 -dv 50 '#{nucleotide_string}'`.to_f
|
|
80
|
+
`oligotm -tp 1 -sc 1 -n 0.2 -d 2 -mv 1 '#{nucleotide_string}'`.to_f
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
class Oligo
|
|
86
|
+
attr_accessor :sequence, :tm, :distance
|
|
87
|
+
end
|
|
88
|
+
end
|