finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
require 'optparse'
|
|
2
|
+
require 'tempfile'
|
|
3
|
+
|
|
4
|
+
require 'bio-logger'
|
|
5
|
+
require 'bio'
|
|
6
|
+
require 'progressbar'
|
|
7
|
+
require 'bio-ipcress'
|
|
8
|
+
$:.unshift File.join(ENV['HOME'],'git','bioruby-primer3','lib')
|
|
9
|
+
require 'bio-primer3'
|
|
10
|
+
|
|
11
|
+
class Bio::FinishM::Primers::Checker
|
|
12
|
+
include Bio::FinishM::Logging
|
|
13
|
+
|
|
14
|
+
def add_options(optparse_object, options)
|
|
15
|
+
optparse_object.banner = "\nUsage: finishm primers_check --primers <primers_file> [options]
|
|
16
|
+
|
|
17
|
+
Check that each pair of primers in the primers fil is compatible.
|
|
18
|
+
\n\n"
|
|
19
|
+
|
|
20
|
+
options.merge!({
|
|
21
|
+
:logger => 'stderr',
|
|
22
|
+
:logger_trace_level => 'info',
|
|
23
|
+
:melting_temperature_optimum => 56,
|
|
24
|
+
:melting_temperature_tolerance => 2,
|
|
25
|
+
:extra_global_primer3_options => {
|
|
26
|
+
'PRIMER_MAX_POLY_X' => 4,
|
|
27
|
+
'PRIMER_EXPLAIN_FLAG' => '1',
|
|
28
|
+
},
|
|
29
|
+
:persevere => false,
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
optparse_object.separator "Required arguments:\n\n"
|
|
33
|
+
optparse_object.on("-p", "--primers PRIMERS_FILE", String, "A file of primers, newline separated [required]") do |arg|
|
|
34
|
+
options[:primers_file] = arg
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
optparse_object.separator "\nOptional arguments:\n\n"
|
|
38
|
+
optparse_object.on("--contig-universe FASTA_FILE", String, "All contigs in the mixture [default: unspecified (don't test this)]") do |arg|
|
|
39
|
+
options[:contig_universe] = arg
|
|
40
|
+
end
|
|
41
|
+
optparse_object.on("--optimum-melting-temperature TEMPERATURE", Integer, "Primers aim for this melting temperature [default: default in primer3 (currently #{options[:melting_temperature_optimum]}C)]") do |arg|
|
|
42
|
+
options[:melting_temperature_optimum] = arg.to_i
|
|
43
|
+
raise Exception, " has to be greater than 0, found #{arg}" unless options[:melting_temperature_optimum] > 0
|
|
44
|
+
end
|
|
45
|
+
optparse_object.on("--primer3-options OPTION_LIST", "Give extra instructions to Primer3 [default <none>]. Acceptable values can be found in the primer3 manual e.g. 'PRIMER_MAX_POLY_X=4;PRIMER_MAX_SIZE=22' will specify those 2 parameters to primer3. Argument names are auto-capitalised so 'primer_max_poly_X=4;primer_max_size=22'is equivalent.") do |arg|
|
|
46
|
+
options[:extra_global_primer3_options] = {}
|
|
47
|
+
arg.split(';').each do |a2|
|
|
48
|
+
splits = a2.split('=')
|
|
49
|
+
unless splits.length == 2
|
|
50
|
+
raise "Unexpected format of the --primer3-options flag, specifically couldn't parse this part: '#{a2}'"
|
|
51
|
+
end
|
|
52
|
+
options[:extra_global_primer3_options][splits[0].upcase]=splits[1]
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def validate_options(options, argv)
|
|
58
|
+
if argv.length != 0
|
|
59
|
+
return "Dangling argument(s) found e.g. #{argv[0] }"
|
|
60
|
+
else
|
|
61
|
+
[
|
|
62
|
+
:primers_file,
|
|
63
|
+
].each do |sym|
|
|
64
|
+
if options[sym].nil?
|
|
65
|
+
return "No option found to specify #{sym}."
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
return nil
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def run(options, argv)
|
|
73
|
+
Bio::Log::CLI.configure('bio-primer3')
|
|
74
|
+
found_error = false
|
|
75
|
+
|
|
76
|
+
# Read the primers in
|
|
77
|
+
primers = []
|
|
78
|
+
File.foreach(options[:primers_file]) do |line|
|
|
79
|
+
line.strip!
|
|
80
|
+
next if line.empty?
|
|
81
|
+
unless line.match(/^[atgc]+$/i)
|
|
82
|
+
raise "Malformed primer sequence '#{line}' found in the file - I'm just after one primer per line, that's all."
|
|
83
|
+
end
|
|
84
|
+
primers.push line
|
|
85
|
+
end
|
|
86
|
+
log.info "Read in #{primers.length} primers e.g. #{primers[0] }"
|
|
87
|
+
raise "Need at least 2 primers!" unless primers.length >= 2
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
primer3_options = options[:extra_global_primer3_options]
|
|
91
|
+
primer3_options.merge!({
|
|
92
|
+
'PRIMER_OPT_TM' => options[:melting_temperature_optimum],
|
|
93
|
+
'PRIMER_MIN_TM' => options[:melting_temperature_optimum]-options[:melting_temperature_tolerance],
|
|
94
|
+
'PRIMER_MAX_TM' => options[:melting_temperature_optimum]+options[:melting_temperature_tolerance],
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
# Check to make sure there is no incompatibilities primer vs primer:
|
|
98
|
+
num_compared = check_primer_compatibilities(primers, primer3_options)
|
|
99
|
+
if num_compared == false
|
|
100
|
+
log.error "Found at least one incompatible primer set, giving up."
|
|
101
|
+
exit 1
|
|
102
|
+
end
|
|
103
|
+
log.info "Validated #{num_compared} different pairs of primers, they don't seem to conflict with each other at all, according to primer3's check primers thing"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# Check the contig universe
|
|
107
|
+
if options[:contig_universe]
|
|
108
|
+
log.info "in-silico PCR: testing the contig universe. This could probably be sped up by only making a single call to ipcress, but oh well."
|
|
109
|
+
num_universe_hits = check_contig_universe(primers, options[:contig_universe])
|
|
110
|
+
if num_universe_hits > 0
|
|
111
|
+
log.warn "Found #{results.length} matches between #{primer1} and #{primer2} in the contig universe, expected none."
|
|
112
|
+
else
|
|
113
|
+
log.info "No sequence appears to be amplified with any two primers in the background set of contigs (this is a good thing)."
|
|
114
|
+
end
|
|
115
|
+
else
|
|
116
|
+
log.info "Not checking to see if primers match any other contigs not targeted are picked up by these primers, because no universe of contigs was specified."
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
if found_error
|
|
121
|
+
log.warn "At least one problem detected in the primer checking process"
|
|
122
|
+
else
|
|
123
|
+
log.warn "No primer issues detected"
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Each pair of primers should be compatible
|
|
130
|
+
def check_primer_compatibilities(primers, primer3_options)
|
|
131
|
+
# First just make sure that everything is ok here
|
|
132
|
+
log.info "Double checking to make sure there is no incompatibilities between primer pairs"
|
|
133
|
+
num_compared = 0
|
|
134
|
+
primers.to_a.combination(2) do |array|
|
|
135
|
+
primer1 = array[0]
|
|
136
|
+
primer2 = array[1]
|
|
137
|
+
|
|
138
|
+
compatible, result = Bio::Primer3.test_primer_compatibility(primer1, primer2, primer3_options, :return_result => true)
|
|
139
|
+
num_compared += 1
|
|
140
|
+
log.debug "Primer3 returned: #{result.inspect}" if log.debug?
|
|
141
|
+
|
|
142
|
+
if compatible == false
|
|
143
|
+
log.warn "Found an incompatibility between #{primer1} and #{primer2}"
|
|
144
|
+
log.warn "Primer3 output was: #{result.inspect}"
|
|
145
|
+
return false
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
return num_compared
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# Does any primer pairs amplify anything in a large set of background (universe) contigs?
|
|
155
|
+
# They shouldn't.
|
|
156
|
+
def check_contig_universe(primers, contig_universe)
|
|
157
|
+
num_hits = 0
|
|
158
|
+
num_compared = 0
|
|
159
|
+
|
|
160
|
+
primers.to_a.combination(2) do |array|
|
|
161
|
+
primer1 = array[0]
|
|
162
|
+
primer2 = array[1]
|
|
163
|
+
num_compared += 1
|
|
164
|
+
|
|
165
|
+
primer_set = Bio::Ipcress::PrimerSet.new primer1, primer2
|
|
166
|
+
results = Bio::Ipcress.run primer_set, options[:contig_universe], ipcress_options
|
|
167
|
+
|
|
168
|
+
if results.length > 0
|
|
169
|
+
num_hits += 1
|
|
170
|
+
log.warn "Found #{results.length} matches between #{primer1} and #{primer2} in the contig universe, expected none."
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
log.debug "Tested #{num_compared} pairs of primers to see if anything else was hit, warnings above if any did so"
|
|
175
|
+
end
|
|
176
|
+
end
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
class Bio::FinishM::RoundUp
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
|
|
4
|
+
DEFAULT_OPTIONS = {
|
|
5
|
+
:contig_end_length => 200,
|
|
6
|
+
:graph_search_leash_length => 20000,
|
|
7
|
+
:unscaffold_first => false,
|
|
8
|
+
:recoherence_kmer => 1,
|
|
9
|
+
:debug => false,
|
|
10
|
+
:gapfill_only => false,
|
|
11
|
+
:max_explore_nodes => 10000,
|
|
12
|
+
:max_gapfill_paths => 10,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
def add_options(optparse_object, options)
|
|
16
|
+
optparse_object.banner = "\nUsage: finishm roundup --genomes <genome1.fasta>[,<genome2.fasta>,...] --fastq-gz <reads..> --output-directory <directory>
|
|
17
|
+
|
|
18
|
+
Takes one or more genomes and tries to improve their quality by reducing the number of
|
|
19
|
+
scaffolds and N characters they contain.
|
|
20
|
+
|
|
21
|
+
Example:
|
|
22
|
+
|
|
23
|
+
finishm roundup --genomes genome1.fasta,genome2.fasta --fastq-gz reads.1.fq.gz,reads.2.fq.gz --output-directory finishm_roundup_results
|
|
24
|
+
|
|
25
|
+
That will create a collapsed de-Bruijn graph from reads.1.fq.gz and reads.2.fq.gz, then try to find connections between
|
|
26
|
+
the starts and the ends of the contigs in genome1.fasta. If any connections between contigs are mutually exclusive,
|
|
27
|
+
then they are incorporated into scaffolds together, and gapfilling is attempted. The final sequences are output in
|
|
28
|
+
the finishm_roundup_results directory in FASTA format. The procedure is then repeated for genome2.fasta.
|
|
29
|
+
|
|
30
|
+
\n\n"
|
|
31
|
+
|
|
32
|
+
options.merge!(DEFAULT_OPTIONS)
|
|
33
|
+
|
|
34
|
+
optparse_object.separator "\nRequired arguments:\n\n"
|
|
35
|
+
optparse_object.on("--genomes FASTA_1[,FASTA_2...]", Array, "fasta files of genomes to be improved [required]") do |arg|
|
|
36
|
+
options[:assembly_files] = arg
|
|
37
|
+
end
|
|
38
|
+
optparse_object.on("--output-directory PATH", "Output results to this directory [required]") do |arg|
|
|
39
|
+
options[:output_directory] = arg
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
optparse_object.separator "\nThere must be some definition of reads too:\n\n" #TODO improve this help
|
|
43
|
+
Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
|
|
44
|
+
|
|
45
|
+
optparse_object.separator "\nOptional arguments:\n\n"
|
|
46
|
+
optparse_object.on("--overhang NUM", Integer, "Start assembling this far from the ends of the contigs [default: #{options[:contig_end_length] }]") do |arg|
|
|
47
|
+
options[:contig_end_length] = arg.to_i
|
|
48
|
+
end
|
|
49
|
+
optparse_object.on("--recoherence-kmer NUM", Integer, "Use a kmer longer than the original velvet one, to help remove bubbles and circular paths [default: none]") do |arg|
|
|
50
|
+
options[:recoherence_kmer] = arg
|
|
51
|
+
end
|
|
52
|
+
optparse_object.on("--leash-length NUM", Integer, "Don't explore too far in the graph, only this far and not much more [default: #{options[:graph_search_leash_length] }]") do |arg|
|
|
53
|
+
options[:graph_search_leash_length] = arg
|
|
54
|
+
end
|
|
55
|
+
optparse_object.on("--unscaffold-first", "Break the scaffolds in the contigs file apart, and then wander between the resultant contigs. This option is only relevant to the wander step; gapfilling is attempted on all gaps by default. [default: #{options[:unscaffold_first] }]") do
|
|
56
|
+
options[:unscaffold_first] = true
|
|
57
|
+
end
|
|
58
|
+
optparse_object.on("--gapfill-only", "Don't wander, just gapfill [default: #{options[:gapfill_only] }]") do
|
|
59
|
+
options[:gapfill_only] = true
|
|
60
|
+
end
|
|
61
|
+
optparse_object.on("--max-gapfill-paths NUM", Integer, "When this number of paths is exceeded, don't gapfill, print as Ns [default: #{options[:max_gapfill_paths] }]") do |arg|
|
|
62
|
+
options[:max_gapfill_paths] = arg
|
|
63
|
+
end
|
|
64
|
+
optparse_object.on("--max-explore-nodes NUM", Integer, "Only explore this many nodes. If max is reached, do not make connections. [default: #{options[:max_explore_nodes] }]") do |arg|
|
|
65
|
+
options[:max_explore_nodes] = arg
|
|
66
|
+
end
|
|
67
|
+
optparse_object.on("--debug", "Build the graph, then drop to a pry console. [default: #{options[:debug] }]") do
|
|
68
|
+
options[:debug] = true
|
|
69
|
+
end
|
|
70
|
+
optparse_object.on("--probe NUM",Integer,"debug mode: explore from this probe number (1-based index), gapfill only, no wander. [default: explore from all probes}]") do |arg|
|
|
71
|
+
options[:interesting_probes] = [arg-1] #convert to 0-based index
|
|
72
|
+
options[:gapfill_only] = true
|
|
73
|
+
end
|
|
74
|
+
#optparse_object.on("--proceed-on-short-contigs", "By default, when overly short contigs are encountered, finishm croaks. This option stops the croaking [default: #{options[:proceed_on_short_contigs] }]") do
|
|
75
|
+
# options[:proceed_on_short_contigs] = true
|
|
76
|
+
#end
|
|
77
|
+
|
|
78
|
+
Bio::FinishM::GraphGenerator.new.add_options optparse_object, options
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def validate_options(options, argv)
|
|
82
|
+
#TODO: give a better description of the error that has occurred
|
|
83
|
+
#TODO: require reads options
|
|
84
|
+
if argv.length != 0
|
|
85
|
+
return "Dangling argument(s) found e.g. #{argv[0] }"
|
|
86
|
+
else
|
|
87
|
+
[
|
|
88
|
+
:assembly_files,
|
|
89
|
+
:output_directory,
|
|
90
|
+
].each do |sym|
|
|
91
|
+
if options[sym].nil?
|
|
92
|
+
return "No option found to specify #{sym}."
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
#if return nil from here, options all were parsed successfully
|
|
97
|
+
return Bio::FinishM::ReadInput.new.validate_options(options, [])
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def run(options, argv=[])
|
|
102
|
+
# Make sure output directory is writeable to avoid late croaking
|
|
103
|
+
output_directory = setup_output_directory options[:output_directory]
|
|
104
|
+
|
|
105
|
+
# Gather the probes from each genome supplied
|
|
106
|
+
genomes = Bio::FinishM::InputGenome.parse_genome_fasta_files(
|
|
107
|
+
options[:assembly_files],
|
|
108
|
+
options[:contig_end_length],
|
|
109
|
+
options
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Generate one velvet assembly to rule them all (forging the assembly is hard work..)
|
|
113
|
+
probe_sequences = genomes.collect{|genome| genome.probe_sequences}.flatten
|
|
114
|
+
# Generate the graph with the probe sequences in it.
|
|
115
|
+
read_input = Bio::FinishM::ReadInput.new
|
|
116
|
+
read_input.parse_options options
|
|
117
|
+
master_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probe_sequences, read_input, options)
|
|
118
|
+
|
|
119
|
+
binding.pry if options[:debug]
|
|
120
|
+
|
|
121
|
+
# For each genome, wander, gapfill, then output
|
|
122
|
+
wanderer = Bio::FinishM::Wanderer.new
|
|
123
|
+
gapfiller = Bio::FinishM::GapFiller.new
|
|
124
|
+
printer = Bio::AssemblyGraphAlgorithms::ContigPrinter.new
|
|
125
|
+
genomes.each do |genome|
|
|
126
|
+
# wander using just the probes on the ends of the scaffolds
|
|
127
|
+
connected_scaffolds = nil
|
|
128
|
+
all_connections = []
|
|
129
|
+
gaps_filled_in_genome = 0
|
|
130
|
+
wandered_probe_indices = nil
|
|
131
|
+
|
|
132
|
+
File.open(File.join(output_directory, File.basename(genome.filename)+".report.txt"),'w') do |report|
|
|
133
|
+
report.puts "#{Time.now} FinishM report for roundup run with: #{options.inspect}"
|
|
134
|
+
|
|
135
|
+
if options[:gapfill_only]
|
|
136
|
+
log.info "Skipping wander, gapfilling only"
|
|
137
|
+
connected_scaffolds = Bio::FinishM::ConnectionInterpreter.new([], (0...genome.scaffolds.length)).scaffolds([])
|
|
138
|
+
else
|
|
139
|
+
log.debug "Wandering.."
|
|
140
|
+
connected_scaffolds, all_connections, wandered_probe_indices = wander_a_genome(wanderer, genome, master_graph, options, report)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Write out all the connections
|
|
144
|
+
File.open(File.join(output_directory, File.basename(genome.filename)+".connections.csv"),'w') do |con_file|
|
|
145
|
+
all_connections.each do |connection|
|
|
146
|
+
con_file.puts connection
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
output_path = File.join(output_directory, File.basename(genome.filename)+".scaffolds.fasta")
|
|
151
|
+
variants_path = File.join(output_directory, File.basename(genome.filename)+".at_least_half_completely_wrong.vcf")
|
|
152
|
+
num_circular_scaffolds = 0
|
|
153
|
+
|
|
154
|
+
File.open(output_path, 'w') do |output_file|
|
|
155
|
+
File.open(variants_path,'w') do |variants_file|
|
|
156
|
+
variants_file.puts %w(#CHROM POS ID REF ALT QUAL FILTER INFO).join("\t")
|
|
157
|
+
# gapfill between
|
|
158
|
+
# (1) interpreted_connections
|
|
159
|
+
# (2) gaps that were present before above wander
|
|
160
|
+
connected_scaffolds.each_with_index do |cross_scaffold_connection, connected_scaffold_index|
|
|
161
|
+
superscaffold_name = "scaffold#{connected_scaffold_index+1}"
|
|
162
|
+
|
|
163
|
+
pretend_contig = cross_scaffold_connection.contigs[0]
|
|
164
|
+
first_scaffold_index = pretend_contig.sequence_index
|
|
165
|
+
|
|
166
|
+
# Gapfill contigs within the scaffold on the extreme LHS
|
|
167
|
+
scaffold_sequence, num_gaps, variants = gapfill_a_scaffold(gapfiller, printer, master_graph, genome, first_scaffold_index, pretend_contig.direction, superscaffold_name, report, variants_file, options)
|
|
168
|
+
gaps_filled_in_genome += num_gaps
|
|
169
|
+
|
|
170
|
+
last_contig = nil
|
|
171
|
+
cross_scaffold_connection.contigs.each_with_index do |contig, superscaffold_index|
|
|
172
|
+
unless last_contig.nil? #skip the first contig - it be done
|
|
173
|
+
last_name = genome.scaffolds[last_contig.sequence_index].name
|
|
174
|
+
current_name = genome.scaffolds[contig.sequence_index].name
|
|
175
|
+
log.debug "Connecting #{last_name} and #{current_name}" if log.debug?
|
|
176
|
+
|
|
177
|
+
# Ready the contig on the RHS of this join
|
|
178
|
+
# Gapfill within the scaffold on the RHS of the new gap
|
|
179
|
+
rhs_sequence, num_gaps, variants = gapfill_a_scaffold(gapfiller, printer, master_graph, genome, contig.sequence_index, contig.direction, superscaffold_name, report, variants_file, options)
|
|
180
|
+
gaps_filled_in_genome += num_gaps
|
|
181
|
+
|
|
182
|
+
# Gapfill across the new gap between scaffolds
|
|
183
|
+
aconn = gapfiller.gapfill(master_graph,
|
|
184
|
+
last_contig.direction == true ? genome.last_probe(last_contig.sequence_index).index : genome.first_probe(last_contig.sequence_index).index,
|
|
185
|
+
contig.direction == true ? genome.first_probe(contig.sequence_index).index : genome.last_probe(contig.sequence_index).index,
|
|
186
|
+
options
|
|
187
|
+
)
|
|
188
|
+
second_sequence = genome.scaffolds[contig.sequence_index].contigs[0].sequence
|
|
189
|
+
log.debug "Found #{aconn.paths.length} connections between #{last_name} and #{current_name}" if log.debug?
|
|
190
|
+
if aconn.paths.length == 0
|
|
191
|
+
# when this occurs, it is due to there being a circuit in the path, so no paths are printed.
|
|
192
|
+
# (at least for now) TODO: this could be improved.
|
|
193
|
+
# Just arbitrarily put in 100 N characters, to denote a join, but no gapfill
|
|
194
|
+
scaffold_sequence = scaffold_sequence+('N'*100)+rhs_sequence
|
|
195
|
+
else
|
|
196
|
+
scaffold_sequence, variants = printer.ready_two_contigs_and_connections(
|
|
197
|
+
master_graph.graph,
|
|
198
|
+
scaffold_sequence,
|
|
199
|
+
aconn,
|
|
200
|
+
rhs_sequence,
|
|
201
|
+
master_graph.velvet_sequences
|
|
202
|
+
)
|
|
203
|
+
# Print variants
|
|
204
|
+
# TODO: need to change coordinates of variants, particularly when >2 contigs are joined?
|
|
205
|
+
variants.each do |variant|
|
|
206
|
+
variant.reference_name = superscaffold_name
|
|
207
|
+
variants_file.puts variant.vcf(scaffold_sequence)
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
last_contig = contig
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
#Output the scaffold to the output directory
|
|
215
|
+
descriptor = nil
|
|
216
|
+
if cross_scaffold_connection.circular?
|
|
217
|
+
descriptor = 'circular'
|
|
218
|
+
num_circular_scaffolds += 1
|
|
219
|
+
else
|
|
220
|
+
descriptor = 'scaffold'
|
|
221
|
+
end
|
|
222
|
+
scaffold_names = cross_scaffold_connection.contigs.collect do |contig|
|
|
223
|
+
genome.scaffolds[contig.sequence_index].name
|
|
224
|
+
end
|
|
225
|
+
output_file.puts ">#{superscaffold_name} #{descriptor} #{scaffold_names.join(':') }"
|
|
226
|
+
output_file.puts scaffold_sequence
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
num_connected_scaffolds = genome.scaffolds.length - connected_scaffolds.length
|
|
230
|
+
log.info "Wrote #{connected_scaffolds.length} scaffolds to #{output_path}, after scaffolding #{num_connected_scaffolds} scaffolds together (#{num_circular_scaffolds} were circular) and filling #{gaps_filled_in_genome} gaps."
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def setup_output_directory(given_directory)
|
|
238
|
+
output_directory = File.absolute_path(given_directory)
|
|
239
|
+
log.debug "Using output directory: #{output_directory}" if log.debug?
|
|
240
|
+
|
|
241
|
+
if File.exist?(output_directory)
|
|
242
|
+
if !File.directory?(output_directory)
|
|
243
|
+
log.error "Specified --output-directory #{output_directory} exists but is a file and not a directory. Cannot continue."
|
|
244
|
+
exit 1
|
|
245
|
+
elsif !File.writable?(output_directory)
|
|
246
|
+
log.error "Specified --output-directory #{output_directory} is not writeable. Cannot continue."
|
|
247
|
+
exit 1
|
|
248
|
+
else
|
|
249
|
+
log.debug "Already existing output directory #{output_directory} seems usable"
|
|
250
|
+
end
|
|
251
|
+
else
|
|
252
|
+
# Creating a new output directory
|
|
253
|
+
Dir.mkdir(output_directory)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
return output_directory
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def wander_a_genome(wanderer, genome, master_probed_graph, options, report)
|
|
260
|
+
# Create new finishm_graph with only probes from the ends of the scaffolds of this genome
|
|
261
|
+
probe_indices = []
|
|
262
|
+
genome.each_scaffold_end_numbered_probe{|probe| probe_indices.push(probe.number)}
|
|
263
|
+
genome_graph = master_probed_graph.subgraph(probe_indices)
|
|
264
|
+
|
|
265
|
+
num_scaffolds = genome.scaffolds.length
|
|
266
|
+
|
|
267
|
+
all_connections = wanderer.probed_graph_to_connections(genome_graph, options)
|
|
268
|
+
|
|
269
|
+
interpreter = Bio::FinishM::ConnectionInterpreter.new(all_connections, (0...num_scaffolds))
|
|
270
|
+
connections = interpreter.doubly_single_contig_connections
|
|
271
|
+
report.puts "Found #{connections.length} connections between contigs that can be used for scaffolding"
|
|
272
|
+
unconnected_probes = interpreter.unconnected_probes
|
|
273
|
+
report.puts "Found #{unconnected_probes.length} contig ends that did not connect to any others"
|
|
274
|
+
unconnected_probes.each do |probe|
|
|
275
|
+
report.puts "Did not connect to any other probes: #{probe.inspect}"
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
return interpreter.scaffolds(connections), all_connections, probe_indices
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def gapfill_a_scaffold(gapfiller, printer, master_graph, genome, scaffold_index, scaffold_direction, superscaffold_name, report, variants_file, options)
|
|
282
|
+
connections = []
|
|
283
|
+
genome.each_gap_probe_pair(scaffold_index) do |probe1, probe2|
|
|
284
|
+
log.info "Gapfilling between probes #{probe1.number+1} and #{probe2.number+1}.."
|
|
285
|
+
next unless options[:interesting_probes].nil? or
|
|
286
|
+
options[:interesting_probes].include?(probe1.number) or
|
|
287
|
+
options[:interesting_probes].include?(probe2.number)
|
|
288
|
+
connections.push gapfiller.gapfill(master_graph, probe1.index, probe2.index, options)
|
|
289
|
+
end
|
|
290
|
+
log.debug "Found #{connections.length} connections" if log.debug?
|
|
291
|
+
|
|
292
|
+
all_variants = []
|
|
293
|
+
num_gapfills = 0
|
|
294
|
+
scaffold = genome.scaffolds[scaffold_index]
|
|
295
|
+
gapfilled_sequence = genome.scaffolds[scaffold_index].contigs[0].sequence
|
|
296
|
+
connections.each_with_index do |aconn, i|
|
|
297
|
+
rhs_sequence = scaffold.contigs[i+1].sequence
|
|
298
|
+
gapfilled_sequence, variants, gapfilled = piece_together_gapfill(
|
|
299
|
+
printer, master_graph, gapfilled_sequence, aconn, rhs_sequence, genome.gap_length(scaffold_index, i),
|
|
300
|
+
options[:max_gapfill_paths]
|
|
301
|
+
)
|
|
302
|
+
if gapfilled
|
|
303
|
+
num_gapfills += 1
|
|
304
|
+
variants.each{|v| all_variants << v}
|
|
305
|
+
to_log = "Filled a gap on genome #{genome.filename}: scaffold #{scaffold.name}: #{scaffold.contigs[i].scaffold_position_end+1}-#{scaffold.contigs[i+1].scaffold_position_start-1}"
|
|
306
|
+
report.puts to_log
|
|
307
|
+
log.info to_log
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
if scaffold_direction == false
|
|
311
|
+
gapfilled_sequence = revcom(gapfilled_sequence)
|
|
312
|
+
all_variants.each do |variant|
|
|
313
|
+
variant.position = gapfilled_sequence.length - variant.position
|
|
314
|
+
variant.reverse!
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
all_variants.each do |variant|
|
|
318
|
+
variant.reference_name = superscaffold_name
|
|
319
|
+
variants_file.puts variant.vcf(gapfilled_sequence)
|
|
320
|
+
end
|
|
321
|
+
return gapfilled_sequence, num_gapfills, all_variants
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def piece_together_gapfill(printer, master_graph, first_sequence, aconn, second_sequence, gap_length, max_gapfill_paths)
|
|
325
|
+
scaffold_sequence = nil
|
|
326
|
+
gapfilled = -1
|
|
327
|
+
if aconn.paths.length == 0 or aconn.paths.length > max_gapfill_paths
|
|
328
|
+
# No paths found. Just fill with Ns like it was before
|
|
329
|
+
scaffold_sequence = first_sequence + 'N'*gap_length + second_sequence
|
|
330
|
+
gapfilled = false
|
|
331
|
+
else
|
|
332
|
+
scaffold_sequence, variants = printer.ready_two_contigs_and_connections(
|
|
333
|
+
master_graph.graph, first_sequence, aconn, second_sequence, master_graph.velvet_sequences
|
|
334
|
+
)
|
|
335
|
+
gapfilled = true
|
|
336
|
+
end
|
|
337
|
+
scaffold_sequence.gsub!('-','') #remove gaps i.e. where the consensus is a gap
|
|
338
|
+
return scaffold_sequence, variants, gapfilled
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def revcom(seq)
|
|
342
|
+
Bio::Sequence::NA.new(seq).reverse_complement.to_s.upcase
|
|
343
|
+
end
|
|
344
|
+
end
|