finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
class Bio::AssemblyGraphAlgorithms::PairedEndAssembler < Bio::AssemblyGraphAlgorithms::SingleEndedAssembler
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
|
|
4
|
+
# Assemble considering reads as a possibly paired-ended. Options are as per SingleEndedAssembler#assemble_from,
|
|
5
|
+
# with the addition of
|
|
6
|
+
# :min_insert_size: minimum length of fragment pair required to satisfy the additional
|
|
7
|
+
# constraints of the paired end assembler.
|
|
8
|
+
# :max_insert_size: maximum length of fragment pair.
|
|
9
|
+
def assemble_from(initial_path, visited_nodes)
|
|
10
|
+
visited_nodes = Set.new
|
|
11
|
+
while true
|
|
12
|
+
# Try to assemble using single ended techniques first, and only if that fails fall
|
|
13
|
+
# back to paired-end techniques.
|
|
14
|
+
path, visited_nodes, next_neighbours = super(initial_path, visited_nodes)
|
|
15
|
+
|
|
16
|
+
# The next_neighbours
|
|
17
|
+
if next_neighbours.empty?
|
|
18
|
+
# No-where to go, do nothing
|
|
19
|
+
# TODO: try to jump over the gap using paired-end sequences
|
|
20
|
+
elsif next_neighbours.length < 2
|
|
21
|
+
raise "Programming error"
|
|
22
|
+
else
|
|
23
|
+
# Choose between forks based on paired-end data
|
|
24
|
+
next_neighbours.select! do |oneigh|
|
|
25
|
+
confirm_connection_backwards(oneigh, path)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Return true if the backward connection is strong enough to warrant adding
|
|
32
|
+
# the new_node to the current_path. Else return false
|
|
33
|
+
#
|
|
34
|
+
# In order to qualify as a warranted path, reads from the pair must have
|
|
35
|
+
# at least 1 connection backwards at least options[:min_insert_size] backwards
|
|
36
|
+
# from the end of the current path, but not more than options[:max_insert_size]
|
|
37
|
+
# Connection length is the length of the read pair's insert size.
|
|
38
|
+
def confirm_connection_backwards(new_onode, current_path, min_insert_size, max_insert_size)
|
|
39
|
+
min_insert_size = @assembly_options[:min_insert_size]
|
|
40
|
+
max_insert_size = @assembly_options[:max_insert_size]
|
|
41
|
+
|
|
42
|
+
# Collect the reads that would qualify if they were in the new node
|
|
43
|
+
qualifying_reads = Set.new
|
|
44
|
+
current_path.reverse.each do |node|
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Look at all the sequences in the new onode. Do any of them qualify
|
|
49
|
+
new_onode.node.short_reads.each do |short|
|
|
50
|
+
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
class Bio::FinishM::PairedEndNeighbourFinder
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
|
|
4
|
+
# parameters for exploration (and code optimisation, actually)
|
|
5
|
+
attr_accessor :min_adjoining_reads, :max_adjoining_node_coverage
|
|
6
|
+
|
|
7
|
+
def initialize(finishm_graph, insert_size)
|
|
8
|
+
@finishm_graph = finishm_graph
|
|
9
|
+
@insert_size = insert_size
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Return an array of Neighbour objects that are adjoined either directly through the
|
|
13
|
+
# de-Bruijn graph or through paired-end connections
|
|
14
|
+
def neighbours(oriented_node)
|
|
15
|
+
direct_neighbours = oriented_node.next_neighbours(@finishm_graph.graph)
|
|
16
|
+
paired_neighbours = paired_neighbour_distances(oriented_node)
|
|
17
|
+
|
|
18
|
+
# Return a dereplicated set, prefer direct connections to paired connections
|
|
19
|
+
dereplicated = {}
|
|
20
|
+
direct_neighbours.each do |oneigh|
|
|
21
|
+
key = oneigh.node_id
|
|
22
|
+
raise if dereplicated[key]
|
|
23
|
+
n = Neighbour.new
|
|
24
|
+
n.node = oneigh.node
|
|
25
|
+
n.first_side = oneigh.first_side
|
|
26
|
+
n.connection_type = Neighbour::DIRECT_CONNECTION
|
|
27
|
+
n.distance = 0
|
|
28
|
+
dereplicated[key] = n
|
|
29
|
+
end
|
|
30
|
+
paired_neighbours.each do |n|
|
|
31
|
+
key = n.node.node_id
|
|
32
|
+
next if dereplicated[key] #skip those already connected by direct connection
|
|
33
|
+
dereplicated[key] = n
|
|
34
|
+
end
|
|
35
|
+
return dereplicated.values
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Return an array of Neighbour objects representing nodes that has reads which
|
|
39
|
+
# are paired with reads of the given node. Each distance is the estimated
|
|
40
|
+
# distance separating the nodes.
|
|
41
|
+
def paired_neighbour_distances(oriented_node)
|
|
42
|
+
# Collect a list of node IDs that have a sufficient number of connections
|
|
43
|
+
#hash of node ID to Array of [first_read, second_read_id] where first_read is on the current
|
|
44
|
+
# node and second_read is on the neighbour
|
|
45
|
+
neighbour_node_read_pairs = {}
|
|
46
|
+
oriented_node.node.short_reads.each do |read|
|
|
47
|
+
# skip reads not going in the direction consistent with direction of travel
|
|
48
|
+
next unless (read.direction == true and oriented_node.first_side == Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST) or
|
|
49
|
+
(read.direction == false and oriented_node.first_side == Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST)
|
|
50
|
+
|
|
51
|
+
pair_read_id = @finishm_graph.velvet_sequences.pair_id(read.read_id)
|
|
52
|
+
unless pair_read_id.nil? #i.e. if read is paired
|
|
53
|
+
@finishm_graph.read_to_nodes[pair_read_id].each do |node_id|
|
|
54
|
+
neighbour_node_read_pairs[node_id] ||= []
|
|
55
|
+
neighbour_node_read_pairs[node_id] << [read, pair_read_id]
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Create a hash of paired node_ids to [fwd_noded_read, rev_noded_read]
|
|
61
|
+
node_ids_to_read_and_pair = {}
|
|
62
|
+
neighbour_node_read_pairs.each do |neighbour_node_id, pairs|
|
|
63
|
+
next if (@min_adjoining_reads and pairs.length < @min_adjoining_reads)
|
|
64
|
+
|
|
65
|
+
# ignore neighbours that have too much coverage, these are likely
|
|
66
|
+
# to be incorrect connections e.g. adapter contamination
|
|
67
|
+
neighbour_node = @finishm_graph.graph.nodes[neighbour_node_id]
|
|
68
|
+
log.debug "Found neighbour node coverage #{neighbour_node.coverage}" if log.debug?
|
|
69
|
+
if @max_adjoining_node_coverage and neighbour_node.coverage > @max_adjoining_node_coverage
|
|
70
|
+
log.debug "Skipping node #{neighbour_node} as it has coverage #{neighbour_node.coverage} not < #{@max_adjoining_node_coverage}" if log.debug?
|
|
71
|
+
next
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
node_ids_to_read_and_pair[neighbour_node_id] ||= []
|
|
75
|
+
|
|
76
|
+
pairs.each do |pair|
|
|
77
|
+
read = pair[0]
|
|
78
|
+
pair_read_id = pair[1]
|
|
79
|
+
rev_read = neighbour_node.short_reads.get_read_by_id(pair_read_id)
|
|
80
|
+
if rev_read.nil?
|
|
81
|
+
raise "unexpectedly didn't find read attached to node when one was expected: #{pair_read_id}"
|
|
82
|
+
end
|
|
83
|
+
node_ids_to_read_and_pair[neighbour_node_id] << [read, rev_read]
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Collate each list of reads into a list of PairedNeighbour objects
|
|
88
|
+
neighbours = []
|
|
89
|
+
node_ids_to_read_and_pair.each do |neighbour_node_id, read_pairs|
|
|
90
|
+
next if neighbour_node_id == oriented_node.node.node_id #nodes paired to themselves don't count
|
|
91
|
+
|
|
92
|
+
neighbour = Neighbour.new
|
|
93
|
+
neighbour.connection_type = Neighbour::PAIRED_END_CONNECTION
|
|
94
|
+
neighbour.node = @finishm_graph.graph.nodes[neighbour_node_id]
|
|
95
|
+
log.debug "Setting neighbour node as #{neighbour.node}" if log.debug?
|
|
96
|
+
|
|
97
|
+
# find the expected direction of the
|
|
98
|
+
direction_vote = {}
|
|
99
|
+
read_pairs.each do |pair|
|
|
100
|
+
key = pair[1].direction
|
|
101
|
+
direction_vote[key] ||= 0
|
|
102
|
+
direction_vote[key] += 1
|
|
103
|
+
end
|
|
104
|
+
found_direction = direction_vote.max{|a,b| a[1] <=> b[1]}[0]
|
|
105
|
+
if found_direction == true
|
|
106
|
+
neighbour.first_side = Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST
|
|
107
|
+
elsif found_direction == false
|
|
108
|
+
neighbour.first_side = Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
distance_sum = 0
|
|
112
|
+
num_adjoining_reads = 0
|
|
113
|
+
read_pairs.each do |pair|
|
|
114
|
+
if found_direction == pair[1].direction
|
|
115
|
+
distance = estimate_distance_between_nodes(oriented_node.node, neighbour.node, pair[0], pair[1], @insert_size)
|
|
116
|
+
unless distance.nil?
|
|
117
|
+
log.debug "Accepting distance #{distance} from read_id #{pair[1].read_id}" if log.debug?
|
|
118
|
+
distance_sum += distance
|
|
119
|
+
num_adjoining_reads += 1
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# don't accept when no adjoining reads are found
|
|
125
|
+
if num_adjoining_reads > 0 and
|
|
126
|
+
(@min_adjoining_reads.nil? or num_adjoining_reads >= @min_adjoining_reads)
|
|
127
|
+
|
|
128
|
+
if distance_sum < 0
|
|
129
|
+
# don't predict negative distances
|
|
130
|
+
neighbour.distance = 0
|
|
131
|
+
else
|
|
132
|
+
neighbour.distance = distance_sum.to_f / num_adjoining_reads
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
neighbour.num_adjoining_reads = num_adjoining_reads
|
|
136
|
+
neighbours.push neighbour
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
return neighbours
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# estimate the distance between the two nodes, assuming that the pair orientation is
|
|
144
|
+
# not problematic. Return nil if the estimated insert size is greater than 2 times the
|
|
145
|
+
# expected insert size, or less than -1 times the insert size.
|
|
146
|
+
def estimate_distance_between_nodes(node1, node2, fwd_read, rev_read, insert_size)
|
|
147
|
+
fwd_contribution = node1.length_alone - fwd_read.offset_from_start_of_node + fwd_read.start_coord
|
|
148
|
+
rev_contribution = node2.length_alone - rev_read.offset_from_start_of_node + rev_read.start_coord
|
|
149
|
+
diff = insert_size - fwd_contribution - rev_contribution
|
|
150
|
+
if diff > insert_size*2 or diff < -insert_size
|
|
151
|
+
return nil
|
|
152
|
+
else
|
|
153
|
+
return diff
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
class Neighbour < Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode
|
|
158
|
+
PAIRED_END_CONNECTION = :paired_end_connection
|
|
159
|
+
DIRECT_CONNECTION = :direct_connection
|
|
160
|
+
attr_accessor :connection_type
|
|
161
|
+
|
|
162
|
+
attr_accessor :node
|
|
163
|
+
attr_accessor :first_side
|
|
164
|
+
attr_accessor :distance
|
|
165
|
+
attr_accessor :num_adjoining_reads
|
|
166
|
+
|
|
167
|
+
def to_settable
|
|
168
|
+
[@node.node_id, @first_side]
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def inspect
|
|
172
|
+
"Neighbour #{object_id}: node=#{@node.node_id} first=#{@first_side} distance=#{@distance} num_adjoining_reads=#{@num_adjoining_reads} connection_type:#{@connection_type}"
|
|
173
|
+
end
|
|
174
|
+
alias_method :to_s, :inspect
|
|
175
|
+
end
|
|
176
|
+
end
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
class Bio::FinishM::ProbedGraph
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
attr_accessor :probe_nodes, :probe_node_directions, :probe_node_reads, :graph
|
|
4
|
+
|
|
5
|
+
attr_accessor :velvet_result_directory
|
|
6
|
+
|
|
7
|
+
# Most likely a BinarySequenceStore
|
|
8
|
+
attr_accessor :velvet_sequences
|
|
9
|
+
|
|
10
|
+
# Most likely a ReadToNode object
|
|
11
|
+
attr_accessor :read_to_nodes
|
|
12
|
+
|
|
13
|
+
# Were all the probe recovered through the process?
|
|
14
|
+
def completely_probed?
|
|
15
|
+
!(@probe_nodes.find{|node| node.nil?})
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def missing_probe_indices
|
|
19
|
+
missings = []
|
|
20
|
+
@probe_nodes.each_with_index do |probe, i|
|
|
21
|
+
missings.push(i+1) if probe.nil?
|
|
22
|
+
end
|
|
23
|
+
return missings
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Make a Bio::Velvet::Graph::OrientedNodeTrail with just one
|
|
27
|
+
# step in it - the node that corresponds to the probe_index
|
|
28
|
+
def initial_path_from_probe(probe_index)
|
|
29
|
+
initial_path = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
30
|
+
node = @probe_nodes[probe_index]
|
|
31
|
+
raise "No node found for probe #{probe_index}" if node.nil?
|
|
32
|
+
direction = @probe_node_directions[probe_index]
|
|
33
|
+
|
|
34
|
+
way = direction ?
|
|
35
|
+
Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST :
|
|
36
|
+
Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST
|
|
37
|
+
initial_path.add_node node, way
|
|
38
|
+
return initial_path
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Return a Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode
|
|
42
|
+
# corresponding to the index of the probe and its direction
|
|
43
|
+
def velvet_oriented_node(probe_index)
|
|
44
|
+
node = @probe_nodes[probe_index]
|
|
45
|
+
if node.nil?
|
|
46
|
+
return nil
|
|
47
|
+
else
|
|
48
|
+
return initial_path_from_probe(probe_index)[0]
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# The leash is the number of base pairs from the start of the probe,
|
|
53
|
+
# but the path finding algorithm simply uses the combined length of all
|
|
54
|
+
# the nodes without reference to the actual probe sequence. So if the
|
|
55
|
+
# probe is near the end of a long node, then path finding may fail.
|
|
56
|
+
# So adjust the leash length to account for this (or keep the nil
|
|
57
|
+
# if the starting_leash_length is nil)
|
|
58
|
+
def adjusted_leash_length(probe_index, starting_leash_length)
|
|
59
|
+
return nil if starting_leash_length.nil?
|
|
60
|
+
|
|
61
|
+
read = @probe_node_reads[probe_index]
|
|
62
|
+
return read.offset_from_start_of_node+starting_leash_length
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Return a new ProbedGraph that is the same as the current one
|
|
66
|
+
# except that only probe specified in the given probe_indices enumerable
|
|
67
|
+
# are accepted
|
|
68
|
+
def subgraph(probe_indices)
|
|
69
|
+
to_return = Bio::FinishM::ProbedGraph.new
|
|
70
|
+
to_return.graph = @graph
|
|
71
|
+
to_return.velvet_result_directory = @velvet_result_directory
|
|
72
|
+
to_return.velvet_sequences = @velvet_sequences
|
|
73
|
+
|
|
74
|
+
to_return.probe_nodes = []
|
|
75
|
+
to_return.probe_node_directions = []
|
|
76
|
+
to_return.probe_node_reads = []
|
|
77
|
+
probe_indices.each do |i|
|
|
78
|
+
to_return.probe_nodes.push @probe_nodes[i-1]
|
|
79
|
+
to_return.probe_node_directions.push @probe_node_directions[i-1]
|
|
80
|
+
to_return.probe_node_reads.push @probe_node_reads[i-1]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
return to_return
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Return a list of node IDs that are connected through paired-end linkages.
|
|
87
|
+
# This method probably belongs in the Node class except that that is
|
|
88
|
+
# in bio-velvet and yet requires sequence_id_to_node_ids_hash. If all reads are
|
|
89
|
+
# single ended then this method always returns []
|
|
90
|
+
def paired_nodes(node)
|
|
91
|
+
to_return_node_ids = Set.new
|
|
92
|
+
binding.pry
|
|
93
|
+
log.debug "Found #{node.short_reads.length} short reads associated with node #{node}" if log.debug?
|
|
94
|
+
node.short_reads.each do |read|
|
|
95
|
+
pair_read_id = @velvet_sequences.pair_id(read.read_id)
|
|
96
|
+
unless pair_read_id.nil? #i.e. if read is paired
|
|
97
|
+
@read_to_nodes[pair_read_id].each do |node_id|
|
|
98
|
+
to_return_node_ids << node_id
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
# Convert node IDs to node objects and return
|
|
103
|
+
return to_return_node_ids.to_a
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# A class representing reads or sets of reads to be assembled
|
|
2
|
+
class Bio::FinishM::ReadInput
|
|
3
|
+
READ_INPUT_SYMBOLS = [
|
|
4
|
+
:fasta_singles, :fastq_singles, :fasta_singles_gz, :fastq_singles_gz,
|
|
5
|
+
:interleaved_fasta, :interleaved_fastq, :interleaved_fasta_gz, :interleaved_fastq_gz,
|
|
6
|
+
:separate_fasta, :separate_fastq, :separate_fasta_gz, :separate_fastq_gz,
|
|
7
|
+
]
|
|
8
|
+
READ_INPUT_SYMBOLS.each do |sym|
|
|
9
|
+
attr_accessor sym
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Given an OptionParser, add options to it, which parse out read-related options
|
|
13
|
+
def add_options(option_parser, options)
|
|
14
|
+
{
|
|
15
|
+
'--fasta' => :fasta_singles,
|
|
16
|
+
'--fastq' => :fastq_singles,
|
|
17
|
+
'--fasta-gz' => :fasta_singles_gz,
|
|
18
|
+
'--fastq-gz' => :fastq_singles_gz,
|
|
19
|
+
'--interleaved-fasta' => :interleaved_fasta,
|
|
20
|
+
'--interleaved-fastq' => :interleaved_fastq,
|
|
21
|
+
'--interleaved-fasta-gz' => :interleaved_fasta_gz,
|
|
22
|
+
'--interleaved-fastq-gz' => :interleaved_fastq_gz,
|
|
23
|
+
'--separate-fasta' => :separate_fasta,
|
|
24
|
+
'--separate-fastq' => :separate_fastq,
|
|
25
|
+
'--separate-fasta-gz' => :separate_fasta_gz,
|
|
26
|
+
'--separate-fastq-gz' => :separate_fastq_gz,
|
|
27
|
+
}.each do |flag, sym|
|
|
28
|
+
option_parser.on("#{flag} PATH", Array, "One or more paths to reads, comma separated") do |arg|
|
|
29
|
+
options[sym] = arg
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Require at least 1 set of reads to be given, of any type
|
|
35
|
+
def validate_options(options, argv)
|
|
36
|
+
return nil if options[:previous_assembly] #bit of a hack, but hey
|
|
37
|
+
READ_INPUT_SYMBOLS.each do |sym|
|
|
38
|
+
return nil if options[sym]
|
|
39
|
+
end
|
|
40
|
+
return "No definition of reads for assembly was found"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Parse options from options hash into instance variables for this object
|
|
44
|
+
def parse_options(options)
|
|
45
|
+
READ_INPUT_SYMBOLS.each do |sym|
|
|
46
|
+
send("#{sym}=",options[sym]) if options[sym]
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Output a string to be used on the command line with velvet
|
|
51
|
+
def velvet_read_arguments
|
|
52
|
+
readset_index = 1
|
|
53
|
+
args = ''
|
|
54
|
+
# Have to put probe sequences (which are single-ended) first in this (ordered) hash
|
|
55
|
+
{
|
|
56
|
+
:fasta_singles => '-fasta -short',
|
|
57
|
+
:fastq_singles => '-fastq -short',
|
|
58
|
+
:fasta_singles_gz => '-fasta.gz -short',
|
|
59
|
+
:fastq_singles_gz => '-fastq.gz -short',
|
|
60
|
+
:interleaved_fasta => '-fasta -shortPaired',
|
|
61
|
+
:interleaved_fastq => '-fastq -shortPaired',
|
|
62
|
+
:interleaved_fasta_gz => '-fasta.gz -shortPaired',
|
|
63
|
+
:interleaved_fastq_gz => '-fastq.gz -shortPaired',
|
|
64
|
+
:separate_fasta => '-fasta -shortPaired -separate',
|
|
65
|
+
:separate_fastq => '-fastq -shortPaired -separate',
|
|
66
|
+
:separate_fasta_gz => '-fasta.gz -shortPaired -separate',
|
|
67
|
+
:separate_fastq_gz => '-fastq.gz -shortPaired -separate',
|
|
68
|
+
}.each do |sym, velvet_flag|
|
|
69
|
+
paths = send(sym)
|
|
70
|
+
unless paths.nil? or paths.empty?
|
|
71
|
+
args += " #{velvet_flag}"
|
|
72
|
+
paths.each do |path|
|
|
73
|
+
args += " #{path}"
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
return args
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
class Bio::FinishM::ReadToNode
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
|
|
4
|
+
def initialize(filename)
|
|
5
|
+
@bindings = Bio::FinishM::VelvetCBinding.new
|
|
6
|
+
log.debug "Reading ReadToNode file #{filename}.."
|
|
7
|
+
raise "Unable to find readToNode binary file" unless File.exist?(filename)
|
|
8
|
+
@read_to_node = @bindings.read_read_id_to_node_id_lookup_table(filename)
|
|
9
|
+
log.debug "Finished reading ReadToNode file"
|
|
10
|
+
@cache = {}
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Return an array of node IDs that include the given read id
|
|
14
|
+
def [](read_id)
|
|
15
|
+
# cache
|
|
16
|
+
cache = @cache[read_id]
|
|
17
|
+
return cache unless cache.nil?
|
|
18
|
+
|
|
19
|
+
res = @bindings.get_read_id_to_node_id_indexation(@read_to_node, read_id)
|
|
20
|
+
|
|
21
|
+
# # typedef struct {
|
|
22
|
+
# # IDnum num_nodes;
|
|
23
|
+
# # ReadIdNodeId* read_ids_node_ids;
|
|
24
|
+
# # } ReadIdToNodeIdIndexation;
|
|
25
|
+
# class ReadIdToNodeIdIndexationStruct < FFI::Struct
|
|
26
|
+
# layout :num_nodes, :int32,
|
|
27
|
+
# :read_ids_node_ids, :pointer
|
|
28
|
+
# end
|
|
29
|
+
to_return = []
|
|
30
|
+
structs = FFI::Pointer.new(Bio::FinishM::VelvetCBinding::ReadIdNodeIdStruct, res[:read_ids_node_ids].pointer)
|
|
31
|
+
0.upto(res[:num_nodes]-1) do |i|
|
|
32
|
+
to_return << Bio::FinishM::VelvetCBinding::ReadIdNodeIdStruct.new(structs[i])[:node_id].abs
|
|
33
|
+
end
|
|
34
|
+
@cache[read_id] = to_return
|
|
35
|
+
return to_return
|
|
36
|
+
end
|
|
37
|
+
end
|