finishm 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.rspec +1 -0
- data/Gemfile +31 -0
- data/LICENSE.txt +20 -0
- data/README.md +59 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/bin/assembly_visualiser +106 -0
- data/bin/check_primer_combinations.rb +73 -0
- data/bin/contig_joiner.rb +244 -0
- data/bin/contigs_against_assembly.rb +153 -0
- data/bin/finishm +143 -0
- data/bin/finishm_assembler +55 -0
- data/bin/finishm_gap_closer.rb +241 -0
- data/bin/kmer_abundance_file_tool.rb +49 -0
- data/bin/kmer_pattern_to_assembly.rb +377 -0
- data/bin/kmer_profile_finder.rb +92 -0
- data/bin/kmers_count_parse.d +52 -0
- data/bin/kmers_count_tabulate.d +123 -0
- data/bin/kmers_count_tabulate.rb +84 -0
- data/bin/pcr_result_parser.rb +108 -0
- data/bin/primer_finder.rb +119 -0
- data/bin/read_selection_by_kmer.d +174 -0
- data/bin/scaffold_by_pattern.rb +119 -0
- data/bin/scaffold_connection_possibilities_to_knowns.rb +193 -0
- data/bin/scaffold_end_coverages.rb +69 -0
- data/bin/trail_validator.rb +84 -0
- data/ext/mkrf_conf.rb +56 -0
- data/ext/src/Makefile +140 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2643 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +154 -0
- data/ext/src/src/graph.c +3932 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/graphStructures.h +52 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/probe_node_finder.c +84 -0
- data/ext/src/src/probe_node_finder.h +6 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/readToNode.c +218 -0
- data/ext/src/src/readToNode.h +35 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +744 -0
- data/ext/src/src/runReadToNode.c +29 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/compress.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/example +0 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/infback.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/libz.a +0 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip +0 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/trees.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.o +0 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.o +0 -0
- data/lib/assembly/a_b_visualiser.rb +169 -0
- data/lib/assembly/acyclic_connection_finder.rb +81 -0
- data/lib/assembly/all_orfs.rb +615 -0
- data/lib/assembly/bad_format_writer.rb +46 -0
- data/lib/assembly/bam_probe_read_selector.rb +48 -0
- data/lib/assembly/bubbly_assembler.rb +842 -0
- data/lib/assembly/c_probe_node_finder.rb +38 -0
- data/lib/assembly/connection_interpreter.rb +350 -0
- data/lib/assembly/contig_printer.rb +400 -0
- data/lib/assembly/coverage_based_graph_filter.rb +68 -0
- data/lib/assembly/depth_first_search.rb +63 -0
- data/lib/assembly/dijkstra.rb +216 -0
- data/lib/assembly/fluffer.rb +253 -0
- data/lib/assembly/graph_explorer.rb +85 -0
- data/lib/assembly/graph_generator.rb +315 -0
- data/lib/assembly/height_finder.rb +355 -0
- data/lib/assembly/hybrid_velvet_graph.rb +70 -0
- data/lib/assembly/input_genome.rb +182 -0
- data/lib/assembly/kmer_coverage_based_path_filter.rb +65 -0
- data/lib/assembly/node_finder.rb +171 -0
- data/lib/assembly/oriented_node_trail.rb +507 -0
- data/lib/assembly/paired_end_assembler.rb +53 -0
- data/lib/assembly/paired_end_neighbour_finder.rb +176 -0
- data/lib/assembly/probed_graph.rb +105 -0
- data/lib/assembly/read_input.rb +79 -0
- data/lib/assembly/read_to_node.rb +37 -0
- data/lib/assembly/scaffold_breaker.rb +126 -0
- data/lib/assembly/sequence_hasher.rb +71 -0
- data/lib/assembly/single_coherent_paths_between_nodes.rb +533 -0
- data/lib/assembly/single_coherent_wanderer.rb +261 -0
- data/lib/assembly/single_ended_assembler.rb +441 -0
- data/lib/assembly/velvet_c_binding.rb +54 -0
- data/lib/assembly/velvet_graph_sequence_extractor.rb +123 -0
- data/lib/external/VERSION +1 -0
- data/lib/finishm/assemble.rb +224 -0
- data/lib/finishm/explore.rb +217 -0
- data/lib/finishm/finisher.rb +303 -0
- data/lib/finishm/fluff.rb +122 -0
- data/lib/finishm/gapfiller.rb +325 -0
- data/lib/finishm/orfs_finder.rb +88 -0
- data/lib/finishm/path_counter.rb +90 -0
- data/lib/finishm/primers.rb +425 -0
- data/lib/finishm/primers_check.rb +176 -0
- data/lib/finishm/roundup.rb +344 -0
- data/lib/finishm/sequence.rb +142 -0
- data/lib/finishm/visualise.rb +430 -0
- data/lib/finishm/wander.rb +270 -0
- data/lib/kmer_abundance_pattern.rb +79 -0
- data/lib/kmer_multi_abundance_file.rb +48 -0
- data/lib/oligo_designer.rb +88 -0
- data/lib/priner.rb +66 -0
- data/spec/acyclic_connection_finder_spec.rb +551 -0
- data/spec/all_orfs_spec.rb +443 -0
- data/spec/assemble_spec.rb +186 -0
- data/spec/bubbly_assembler_spec.rb +707 -0
- data/spec/c_node_finder_spec.rb +58 -0
- data/spec/connection_interpreter_spec.rb +284 -0
- data/spec/contig_printer_spec.rb +291 -0
- data/spec/coverage_based_graph_filter_spec.rb +102 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam +0 -0
- data/spec/data/6_3e4e5e6e.1vANME.bam.bai +0 -0
- data/spec/data/acyclic_connection_finder/1/probes.fa +5 -0
- data/spec/data/acyclic_connection_finder/1/random1.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random1.sammy.fa.gz +0 -0
- data/spec/data/acyclic_connection_finder/1/random2.fa +2 -0
- data/spec/data/acyclic_connection_finder/1/random2.sammy.fa.gz +0 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/random3000.slightly_changed.fa +39 -0
- data/spec/data/assembly/1_simple_bubble_uneven_coverage/reads_combined.fa.gz +0 -0
- data/spec/data/assembly_visualiser/Contig_6_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Contig_7_1_to_250.fa.kmers31 +220 -0
- data/spec/data/assembly_visualiser/Graph +46 -0
- data/spec/data/assembly_visualiser/start_kmers1 +2 -0
- data/spec/data/bands.csv +1 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq +0 -0
- data/spec/data/c_probe_node_finder/1/CnyUnifiedSeq.names +544 -0
- data/spec/data/c_probe_node_finder/1/Graph2 +668 -0
- data/spec/data/c_probe_node_finder/1/LastGraph +668 -0
- data/spec/data/c_probe_node_finder/1/Log +756 -0
- data/spec/data/c_probe_node_finder/1/PreGraph +11 -0
- data/spec/data/c_probe_node_finder/1/Roadmaps +2009 -0
- data/spec/data/c_probe_node_finder/1/contigs.fa +29 -0
- data/spec/data/c_probe_node_finder/1/stats.txt +6 -0
- data/spec/data/contig_printer/1/HOWTO_RECREATE +17 -0
- data/spec/data/contig_printer/1/contigs.fa +4 -0
- data/spec/data/contig_printer/1/seq.fa +2408 -0
- data/spec/data/contig_printer/1/seq.fa.svg +153 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Graph2 +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/LastGraph +2953 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Log +21 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/PreGraph +27 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Roadmaps +5182 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/Sequences +3612 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/contigs.fa +36 -0
- data/spec/data/contig_printer/1/seq.fa.velvet/stats.txt +14 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam +0 -0
- data/spec/data/contig_printer/1/seq.faVseq2_1to550.fa.bam.bai +0 -0
- data/spec/data/contig_printer/1/seq.node12.fa +4 -0
- data/spec/data/contig_printer/1/seq1_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa +2 -0
- data/spec/data/contig_printer/1/seq2_1to550.fa.fai +1 -0
- data/spec/data/explore/1/2seqs.sammy.fa +12004 -0
- data/spec/data/explore/1/HOWTO_RECREATE.txt +6 -0
- data/spec/data/explore/1/a.fa +2 -0
- data/spec/data/explore/1/seq1_and_a.fa +3 -0
- data/spec/data/explore/1/seq2.fa +2 -0
- data/spec/data/fluff/1/2seqs.sammy.fa +12004 -0
- data/spec/data/fluff/1/HOWTO_RECREATE.txt +5 -0
- data/spec/data/fluff/1/seq1.fa +2 -0
- data/spec/data/fluff/1/seq2.fa +2 -0
- data/spec/data/gapfilling/1/reads.fa +171 -0
- data/spec/data/gapfilling/1/trail_with_Ns.fa +5 -0
- data/spec/data/gapfilling/1/velvetAssembly/Graph2 +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/LastGraph +130 -0
- data/spec/data/gapfilling/1/velvetAssembly/Log +199 -0
- data/spec/data/gapfilling/1/velvetAssembly/PreGraph +7 -0
- data/spec/data/gapfilling/1/velvetAssembly/Roadmaps +239 -0
- data/spec/data/gapfilling/1/velvetAssembly/Sequences +281 -0
- data/spec/data/gapfilling/1/velvetAssembly/contigs.fa +12 -0
- data/spec/data/gapfilling/1/velvetAssembly/stats.txt +4 -0
- data/spec/data/gapfilling/2/HOWTO_recreate +17 -0
- data/spec/data/gapfilling/2/reference.fa +2 -0
- data/spec/data/gapfilling/2/reference_part1.fa +4 -0
- data/spec/data/gapfilling/2/reference_part2.fa +4 -0
- data/spec/data/gapfilling/2/sammy_reads.fa.gz +0 -0
- data/spec/data/gapfilling/2/with_gaps.fa +4 -0
- data/spec/data/gapfilling/3/HOWTO_recreate +4 -0
- data/spec/data/gapfilling/3/reads.fa.gz +0 -0
- data/spec/data/gapfilling/3/reference_part1.fa +4 -0
- data/spec/data/gapfilling/3/reference_part2.fa +4 -0
- data/spec/data/gapfilling/3/with_gaps.fa +4 -0
- data/spec/data/gapfilling/4/HOWTO_recreate +1 -0
- data/spec/data/gapfilling/4/reads.fa.gz +0 -0
- data/spec/data/gapfilling/5/HOWTO_RECREATE +7 -0
- data/spec/data/gapfilling/5/answer.fna +2 -0
- data/spec/data/gapfilling/5/gappy.fna +2 -0
- data/spec/data/gapfilling/5/reads.fa +17961 -0
- data/spec/data/gapfilling/5/velvet51_3.5/LastGraph +8337 -0
- data/spec/data/gapfilling/5/velvet51_3.5/Sequences +20921 -0
- data/spec/data/gapfilling/6/random1.fa +28 -0
- data/spec/data/gapfilling/6/random2.fa +28 -0
- data/spec/data/gapfilling/6/random_sequence_length_2000 +0 -0
- data/spec/data/gapfilling/6/reads.random1.fa.gz +0 -0
- data/spec/data/gapfilling/6/reads.random2.fa.gz +0 -0
- data/spec/data/gapfilling/6/to_gapfill.fa +22 -0
- data/spec/data/kmer_profile_to_assembly/multiple_abundance_file1.csv +2 -0
- data/spec/data/kmers_count1.csv +2 -0
- data/spec/data/kmers_count2.csv +3 -0
- data/spec/data/out +3 -0
- data/spec/data/positive_latching_pair.fa +2 -0
- data/spec/data/primers.csv +4 -0
- data/spec/data/read_selection_by_kmer/blacklist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/input.fasta +6 -0
- data/spec/data/read_selection_by_kmer/whitelist1.txt +1 -0
- data/spec/data/read_selection_by_kmer/whitelist2.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/HOWTO_RECREATE.txt +2 -0
- data/spec/data/read_to_node/1_a_graph/LastGraph +6695 -0
- data/spec/data/read_to_node/1_a_graph/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/2_no_read256_or_259/HOWTO_RECREATE.txt +3 -0
- data/spec/data/read_to_node/2_no_read256_or_259/LastGraph +6693 -0
- data/spec/data/read_to_node/2_no_read256_or_259/ReadToNode.bin +0 -0
- data/spec/data/read_to_node/3_no_last_read/LastGraph +6694 -0
- data/spec/data/read_to_node/3_no_last_read/ReadToNode.bin +0 -0
- data/spec/data/t/details.txt +5 -0
- data/spec/data/t/details.txt.srt +5 -0
- data/spec/data/t/location.txt +3 -0
- data/spec/data/t/location.txt.srt +3 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/answer.fa +2 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/reads.fa.gz +0 -0
- data/spec/data/tweak/1_gap_then_unscaffolded/scaffolds.fa +6 -0
- data/spec/data/tweak/2_second_genome/answer2.fa +2 -0
- data/spec/data/tweak/2_second_genome/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/answer.fa +2 -0
- data/spec/data/tweak/3_variant/lesser_answer.fa +2 -0
- data/spec/data/tweak/3_variant/reads.fa.gz +0 -0
- data/spec/data/tweak/3_variant/with_gaps.fa +2 -0
- data/spec/data/velvet_test_trails/Assem/Graph +17 -0
- data/spec/data/velvet_test_trails/Assem/Graph2 +40 -0
- data/spec/data/velvet_test_trails/Assem/LastGraph +40 -0
- data/spec/data/velvet_test_trails/Assem/Log +35 -0
- data/spec/data/velvet_test_trails/Assem/PreGraph +9 -0
- data/spec/data/velvet_test_trails/Assem/Roadmaps +89 -0
- data/spec/data/velvet_test_trails/Assem/Sequences +50 -0
- data/spec/data/velvet_test_trails/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails/Assem/contigs.fa +15 -0
- data/spec/data/velvet_test_trails/Assem/stats.txt +5 -0
- data/spec/data/velvet_test_trails/node_fwds.fa +8 -0
- data/spec/data/velvet_test_trails/node_seqs.fa +9 -0
- data/spec/data/velvet_test_trails/nodes_fwd_rev.fa +16 -0
- data/spec/data/velvet_test_trails/read1.fa +2 -0
- data/spec/data/velvet_test_trails/reads.fa +50 -0
- data/spec/data/velvet_test_trails_reverse/Assem/LastGraph +17 -0
- data/spec/data/velvet_test_trails_reverse/Assem/a.svg +53 -0
- data/spec/data/velvet_test_trails_reverse/reads_reversed.fa +10 -0
- data/spec/data/visualise/1/LastGraph +6695 -0
- data/spec/data/visualise/2_paired_end/HOWTO_RECREATE.txt +10 -0
- data/spec/data/visualise/2_paired_end/rand1.fa +2 -0
- data/spec/data/visualise/2_paired_end/rand2.fa +2 -0
- data/spec/data/visualise/2_paired_end/with_gaps.fa +8 -0
- data/spec/data/visualise/2_paired_end/with_gaps.read_pairs.fa.gz +0 -0
- data/spec/data/wander/1/random1.fa +2 -0
- data/spec/data/wander/1/random1.sammy.fa +804 -0
- data/spec/depth_first_search_spec.rb +190 -0
- data/spec/dijkstra_spec.rb +143 -0
- data/spec/explore_spec.rb +29 -0
- data/spec/fluffer_spec.rb +155 -0
- data/spec/gapfiller_spec.rb +107 -0
- data/spec/graph_explorer_spec.rb +475 -0
- data/spec/graph_generator_spec.rb +99 -0
- data/spec/height_finder_spec.rb +306 -0
- data/spec/kmer_abundance_pattern_spec.rb +56 -0
- data/spec/kmer_coverage_based_path_filter_spec.rb +73 -0
- data/spec/kmer_profile_finder_spec.rb +38 -0
- data/spec/kmers_count_tabulate_spec.rb +120 -0
- data/spec/oriented_node_trail_spec.rb +221 -0
- data/spec/paired_end_neighbours_spec.rb +126 -0
- data/spec/paths_between_nodes_spec.rb +349 -0
- data/spec/priner_spec.rb +7 -0
- data/spec/read_input_spec.rb +23 -0
- data/spec/read_selection_by_kmer_spec.rb +166 -0
- data/spec/read_to_node_spec.rb +35 -0
- data/spec/roundup_spec.rb +366 -0
- data/spec/scaffold_breaker_spec.rb +144 -0
- data/spec/sequence_spec.rb +43 -0
- data/spec/single_coherent_paths_between_nodes_spec.rb +492 -0
- data/spec/single_coherent_wanderer_spec.rb +120 -0
- data/spec/single_ended_assembler_spec.rb +398 -0
- data/spec/spec_helper.rb +310 -0
- data/spec/velvet_graph_sequence_extractor_spec.rb +80 -0
- data/spec/visualise_spec.rb +105 -0
- data/spec/wander_spec.rb +119 -0
- data/spec/watch_for_changes.sh +16 -0
- data/validation/fasta_compare.rb +72 -0
- data/validation/gapfill_simulate_perfect.rb +108 -0
- metadata +899 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
class Bio::FinishM::ORFsFinder
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
|
|
4
|
+
DEFAULT_OPTIONS = {
|
|
5
|
+
:min_orf_length => 100
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
def add_options(optparse_object, options)
|
|
9
|
+
options.merge! Bio::FinishM::Visualise::DEFAULT_OPTIONS
|
|
10
|
+
options.merge! DEFAULT_OPTIONS
|
|
11
|
+
optparse_object.banner = "\nUsage: finishm find_orfs --assembly-???
|
|
12
|
+
|
|
13
|
+
Find possible open reading frames in assembly graph
|
|
14
|
+
\n\n"
|
|
15
|
+
|
|
16
|
+
optparse_object.separator "Input genome information"
|
|
17
|
+
optparse_object.separator "\nIf an assembly is to be done, there must be some definition of reads:\n\n" #TODO improve this help
|
|
18
|
+
Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
|
|
19
|
+
|
|
20
|
+
optparse_object.separator "\nOptional graph-exploration arguments:\n\n"
|
|
21
|
+
Bio::FinishM::Visualise.new.add_probe_options(optparse_object, options)
|
|
22
|
+
|
|
23
|
+
optparse_object.separator "\nOptional graph-related arguments:\n\n"
|
|
24
|
+
Bio::FinishM::GraphGenerator.new.add_options(optparse_object, options)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def validate_options(options, argv)
|
|
28
|
+
visualise = Bio::FinishM::Visualise.new
|
|
29
|
+
return visualise.validate_argv_length(argv) ||
|
|
30
|
+
visualise.validate_probe_options(options) ||
|
|
31
|
+
visualise.validate_assembly_options(options)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def run(options, argv)
|
|
35
|
+
read_input = Bio::FinishM::ReadInput.new
|
|
36
|
+
read_input.parse_options options
|
|
37
|
+
|
|
38
|
+
visualise = Bio::FinishM::Visualise.new
|
|
39
|
+
|
|
40
|
+
if options[:interesting_probes] or options[:interesting_probe_names]
|
|
41
|
+
finishm_graph, interesting_node_ids = visualise.generate_graph_from_probes(read_input, options)
|
|
42
|
+
elsif options[:interesting_nodes]
|
|
43
|
+
finishm_graph = visualise.generate_graph_from_nodes(read_input, options)
|
|
44
|
+
interesting_node_ids = options[:interesting_nodes]
|
|
45
|
+
elsif options[:assembly_files]
|
|
46
|
+
finishm_graph, interesting_node_ids, = visualise.generate_graph_from_assembly(read_input, options)
|
|
47
|
+
else
|
|
48
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
if options[:graph_search_leash_length]
|
|
52
|
+
#log.info "Finding nodes within the leash length of #{options[:graph_search_leash_length] }.."
|
|
53
|
+
nodes_within_leash, node_ids_at_leash = visualise.get_nodes_within_leash(finishm_graph, interesting_node_ids, options)
|
|
54
|
+
log.info "Found #{node_ids_at_leash.length} nodes at the end of the #{options[:graph_search_leash_length] }bp leash" if options[:graph_search_leash_length]
|
|
55
|
+
|
|
56
|
+
options[:range] = nodes_within_leash
|
|
57
|
+
else
|
|
58
|
+
options[:range] = finishm_graph.graph.nodes
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
initial_onodes = Bio::FinishM::PathCounter.new.get_leash_start_nodes(finishm_graph, options[:range])
|
|
62
|
+
find_orfs_in_graph(finishm_graph, initial_onodes, options)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def find_orfs_in_graph(finishm_graph, initial_onodes, options={})
|
|
66
|
+
initial_paths = initial_onodes.collect do |onode|
|
|
67
|
+
path = Bio::Velvet::Graph::OrientedNodeTrail.new
|
|
68
|
+
path.add_oriented_node onode
|
|
69
|
+
path
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
orfer = Bio::AssemblyGraphAlgorithms::AllOrfsFinder.new
|
|
73
|
+
orf_trails = orfer.find_orfs_in_graph(finishm_graph.graph, initial_paths,
|
|
74
|
+
options[:min_orf_length], options[:range])
|
|
75
|
+
|
|
76
|
+
found_orfs = orfer.orf_sequences_from_trails(orf_trails)
|
|
77
|
+
|
|
78
|
+
found_orfs.each_pair do |name, sequence|
|
|
79
|
+
puts ">#{name}"
|
|
80
|
+
puts sequence
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def orf_to_settable(path, start_index, start_offset, end_index, end_offset)
|
|
86
|
+
[path[start_index..end_index].collect{|onode| onode.to_settable},[start_offset, end_offset]]
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
class Bio::FinishM::PathCounter
|
|
2
|
+
include Bio::FinishM::Logging
|
|
3
|
+
|
|
4
|
+
def add_options(optparse_object, options)
|
|
5
|
+
options.merge! Bio::FinishM::Visualise::DEFAULT_OPTIONS
|
|
6
|
+
optparse_object.banner = "\nUsage: finishm count_paths --assembly-???
|
|
7
|
+
|
|
8
|
+
Count paths through assembly graph
|
|
9
|
+
\n\n"
|
|
10
|
+
|
|
11
|
+
optparse_object.separator "Input genome information"
|
|
12
|
+
optparse_object.separator "\nIf an assembly is to be done, there must be some definition of reads:\n\n" #TODO improve this help
|
|
13
|
+
Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
|
|
14
|
+
|
|
15
|
+
optparse_object.separator "\nOptional graph-exploration arguments:\n\n"
|
|
16
|
+
Bio::FinishM::Visualise.new.add_probe_options(optparse_object, options)
|
|
17
|
+
|
|
18
|
+
optparse_object.separator "\nOptional graph-related arguments:\n\n"
|
|
19
|
+
Bio::FinishM::GraphGenerator.new.add_options(optparse_object, options)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def validate_options(options, argv)
|
|
23
|
+
#TODO: give a better description of the error that has occurred
|
|
24
|
+
#TODO: require reads options
|
|
25
|
+
visualise = Bio::FinishM::Visualise.new
|
|
26
|
+
return visualise.validate_argv_length(argv) ||
|
|
27
|
+
visualise.validate_probe_options(options) ||
|
|
28
|
+
visualise.validate_assembly_options(options)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def run(options, argv)
|
|
32
|
+
read_input = Bio::FinishM::ReadInput.new
|
|
33
|
+
read_input.parse_options options
|
|
34
|
+
|
|
35
|
+
visualise = Bio::FinishM::Visualise.new
|
|
36
|
+
|
|
37
|
+
if options[:interesting_probes] or options[:interesting_probe_names]
|
|
38
|
+
finishm_graph, interesting_node_ids, = visualise.generate_graph_from_probes(read_input, options)
|
|
39
|
+
if options[:probe_to_node_map]
|
|
40
|
+
# Output probe map if asked
|
|
41
|
+
visualise.write_probe_to_node_map(options[:probe_to_node_map], finishm_graph, options[:interesting_probes])
|
|
42
|
+
end
|
|
43
|
+
elsif options[:interesting_nodes]
|
|
44
|
+
finishm_graph = visualise.generate_graph_from_nodes(read_input, options)
|
|
45
|
+
interesting_nodes = options[:interesting_nodes]
|
|
46
|
+
elsif options[:assembly_files]
|
|
47
|
+
finishm_graph, interesting_node_ids, = visualise.generate_graph_from_assembly(read_input, options)
|
|
48
|
+
else
|
|
49
|
+
finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, options)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
if options[:graph_search_leash_length]
|
|
54
|
+
# get a list of the nodes to be visualised given the leash length
|
|
55
|
+
nodes_within_leash, node_ids_at_leash = visualise.get_nodes_within_leash(finishm_graph, interesting_node_ids, options)
|
|
56
|
+
log.info "Found #{node_ids_at_leash.length} nodes at the end of the #{options[:graph_search_leash_length] }bp leash"
|
|
57
|
+
else
|
|
58
|
+
nodes_within_leash = finishm_graph.graph.nodes
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
initial_onodes = get_leash_start_nodes(finishm_graph, nodes_within_leash)
|
|
62
|
+
|
|
63
|
+
log.info "Counting paths through assembly graph.."
|
|
64
|
+
count_paths_through_graph(finishm_graph, initial_onodes, :range => nodes_within_leash )
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def get_leash_start_nodes(finishm_graph, nodes_within_leash)
|
|
68
|
+
log.info "Finding nodes from which to begin search.."
|
|
69
|
+
start_onodes, = Bio::AssemblyGraphAlgorithms::HeightFinder.new.find_oriented_edge_of_range(finishm_graph.graph, nodes_within_leash)
|
|
70
|
+
if start_onodes.empty?
|
|
71
|
+
#possible with a completely cyclic graph, choose any node to begin
|
|
72
|
+
onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode finishm_graph.graph.nodes[1], true
|
|
73
|
+
start_onodes = [onode]
|
|
74
|
+
log.info "Graph appears to be a self-contained loop, so chose an arbitrary node"
|
|
75
|
+
else
|
|
76
|
+
log.info "Found #{start_onodes.length} nodes"
|
|
77
|
+
end
|
|
78
|
+
return start_onodes
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def count_paths_through_graph(finishm_graph, initial_onodes, options={})
|
|
82
|
+
height_finder = Bio::AssemblyGraphAlgorithms::HeightFinder.new
|
|
83
|
+
|
|
84
|
+
by_height, = height_finder.traverse(finishm_graph.graph, initial_onodes, options)
|
|
85
|
+
min_paths_through = height_finder.min_paths_through(by_height)
|
|
86
|
+
max_paths_through = height_finder.max_paths_through(by_height)
|
|
87
|
+
puts "Minimum number of distinct sequences to explain graph, assuming no errors: #{min_paths_through}."
|
|
88
|
+
puts "Maximum number of distinct sequences allowed by graph: #{max_paths_through}."
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
require 'optparse'
|
|
2
|
+
require 'tempfile'
|
|
3
|
+
|
|
4
|
+
require 'bio-logger'
|
|
5
|
+
require 'bio'
|
|
6
|
+
require 'progressbar'
|
|
7
|
+
require 'bio-ipcress'
|
|
8
|
+
$:.unshift File.join(ENV['HOME'],'git','bioruby-primer3','lib')
|
|
9
|
+
require 'bio-primer3'
|
|
10
|
+
|
|
11
|
+
class Bio::FinishM::Primers
|
|
12
|
+
include Bio::FinishM::Logging
|
|
13
|
+
|
|
14
|
+
def add_options(optparse_object, options)
|
|
15
|
+
optparse_object.banner = "\nUsage: finishm primers --contigs <contig_file> --min-distance-from-contig-ends <num_bases> --max-distance-from-contig-ends <num_bases> [options]
|
|
16
|
+
|
|
17
|
+
Takes a collection of contigs that are assumed to be a single circular genome. Then designs primers off the ends of each of the contigs
|
|
18
|
+
for use in a PCR reaction such that if all primers were included all the PCR reaction all the gaps would be amplified.
|
|
19
|
+
\n\n"
|
|
20
|
+
|
|
21
|
+
options.merge!({
|
|
22
|
+
:logger => 'stderr',
|
|
23
|
+
:logger_trace_level => 'info',
|
|
24
|
+
:melting_temperature_optimum => nil,
|
|
25
|
+
:melting_temperature_tolerance => 2,
|
|
26
|
+
:min_primer_size => 15,
|
|
27
|
+
:extra_global_primer3_options => {
|
|
28
|
+
'PRIMER_MAX_POLY_X' => 4,
|
|
29
|
+
},
|
|
30
|
+
:persevere => false,
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
optparse_object.separator "Required arguments:\n\n"
|
|
34
|
+
optparse_object.on("-c", "--contigs FASTA_FILE", String, "A fasta file of contigs to be worked with [required]") do |arg|
|
|
35
|
+
options[:contigs_file] = arg
|
|
36
|
+
end
|
|
37
|
+
optparse_object.on("--min-distance-from-contig-ends DISTANCE", Integer, "Primers must be at least this far from the ends of the contigs [required]") do |arg|
|
|
38
|
+
options[:min_distance] = arg.to_i
|
|
39
|
+
raise Exception, "--min-distance-from-contig-ends has to be greater than/equal to 0, found #{arg}" unless options[:min_distance] >= 0
|
|
40
|
+
end
|
|
41
|
+
optparse_object.on("--max-distance-from-contig-ends DISTANCE", Integer, "Primers must be at most this far from the ends of the contigs [required]") do |arg|
|
|
42
|
+
options[:max_distance] = arg.to_i
|
|
43
|
+
raise Exception, "--max-distance-from-contig-ends has to be greater than 0, found #{arg}" unless options[:max_distance] > 0
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
optparse_object.separator "\nOptional arguments:\n\n"
|
|
47
|
+
optparse_object.on("--optimum-melting-temperature TEMPERATURE", Integer, "Primers aim for this melting temperature [default: default in primer3 (currently 60C)]") do |arg|
|
|
48
|
+
options[:melting_temperature_optimum] = arg.to_i
|
|
49
|
+
raise Exception, " has to be greater than 0, found #{arg}" unless options[:melting_temperature_optimum] > 0
|
|
50
|
+
end
|
|
51
|
+
optparse_object.on("--contig-universe FASTA_FILE", String, "All contigs in the mixture [default: unspecified (don't test this)]") do |arg|
|
|
52
|
+
options[:contig_universe] = arg
|
|
53
|
+
end
|
|
54
|
+
optparse_object.on("--persevere", "Don't automatically exit when a primer pair doesn't validate, though continue warning on ERROR log level [default: #{options[:persevere] }]") do
|
|
55
|
+
options[:persevere] = true
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
optparse_object.on("--primer3-options OPTION_LIST", "Give extra instructions to Primer3 [default <none>]. Acceptable values can be found in the primer3 manual e.g. 'PRIMER_MAX_POLY_X=4;PRIMER_MAX_SIZE=22' will specify those 2 parameters to primer3. Argument names are auto-capitalised so 'primer_max_poly_X=4;primer_max_size=22'is equivalent.") do |arg|
|
|
59
|
+
options[:extra_global_primer3_options] = {}
|
|
60
|
+
arg.split(';').each do |a2|
|
|
61
|
+
splits = a2.split('=')
|
|
62
|
+
unless splits.length == 2
|
|
63
|
+
raise "Unexpected format of the --primer3-options flag, specifically couldn't parse this part: '#{a2}'"
|
|
64
|
+
end
|
|
65
|
+
options[:extra_global_primer3_options][splits[0].upcase]=splits[1]
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def validate_options(options, argv)
|
|
71
|
+
if argv.length != 0
|
|
72
|
+
return "Dangling argument(s) found e.g. #{argv[0] }"
|
|
73
|
+
else
|
|
74
|
+
[
|
|
75
|
+
:contigs_file,
|
|
76
|
+
:min_distance,
|
|
77
|
+
:max_distance,
|
|
78
|
+
].each do |sym|
|
|
79
|
+
if options[sym].nil?
|
|
80
|
+
return "No option found to specify #{sym}."
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
return nil
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def run(options, argv)
|
|
88
|
+
Bio::Log::CLI.configure('bio-primer3')
|
|
89
|
+
|
|
90
|
+
# Read the contigs in
|
|
91
|
+
contigs = []
|
|
92
|
+
Bio::FlatFile.foreach(options[:contigs_file]) do |entry|
|
|
93
|
+
contigs.push entry
|
|
94
|
+
end
|
|
95
|
+
log.info "Read in #{contigs.length} contigs from #{options[:contigs_file] }"
|
|
96
|
+
|
|
97
|
+
min_length = contigs.collect{|contig| contig.seq.length}.min
|
|
98
|
+
log.info "Minimum contig length #{min_length}"
|
|
99
|
+
unless options[:min_distance] < min_length/ 2
|
|
100
|
+
log.error "Minimum primer distance from the ends of the contigs is too small, as the smallest contig is #{min_length} long, and the min distance must be at least twice this distance"
|
|
101
|
+
exit 1
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
extra_primer3_options = {}
|
|
105
|
+
unless options[:min_primer_size].nil?
|
|
106
|
+
extra_primer3_options.merge!({
|
|
107
|
+
'PRIMER_MIN_SIZE' => options[:min_primer_size],
|
|
108
|
+
})
|
|
109
|
+
end
|
|
110
|
+
unless options[:melting_temperature_optimum].nil?
|
|
111
|
+
extra_primer3_options.merge!({
|
|
112
|
+
'PRIMER_OPT_TM' => options[:melting_temperature_optimum],
|
|
113
|
+
'PRIMER_MIN_TM' => options[:melting_temperature_optimum]-options[:melting_temperature_tolerance],
|
|
114
|
+
'PRIMER_MAX_TM' => options[:melting_temperature_optimum]+options[:melting_temperature_tolerance],
|
|
115
|
+
})
|
|
116
|
+
end
|
|
117
|
+
unless options[:extra_global_primer3_options].nil?
|
|
118
|
+
extra_primer3_options.merge! options[:extra_global_primer3_options]
|
|
119
|
+
end
|
|
120
|
+
if log.debug?
|
|
121
|
+
# Get "debug-mode" from primer3 as well.
|
|
122
|
+
extra_primer3_options.merge! 'PRIMER_EXPLAIN_FLAG' => '1'
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Predict a bunch of different primers for each end of each contig. Predict the start and end of each contig as the pair to pass to primer3
|
|
126
|
+
primer3_results = []
|
|
127
|
+
contigs.each do |contig|
|
|
128
|
+
start_chunk = contig.seq[options[:min_distance]..options[:max_distance]]
|
|
129
|
+
end_chunk = contig.seq[(contig.length-options[:max_distance]) .. (contig.length-options[:min_distance])].downcase
|
|
130
|
+
log.debug "Start chunk length #{start_chunk.length}, end chunk length #{end_chunk.length}"
|
|
131
|
+
|
|
132
|
+
# Join them together so that a forward primer will point off the end of the contig,
|
|
133
|
+
# and a reverse primer will point off the start of the contig
|
|
134
|
+
num_ns = 100
|
|
135
|
+
joined = end_chunk+'N'*num_ns+start_chunk
|
|
136
|
+
|
|
137
|
+
# Predict with primer3
|
|
138
|
+
result = Bio::Primer3.run({
|
|
139
|
+
'SEQUENCE_TEMPLATE' => joined,
|
|
140
|
+
'PRIMER_TASK' => 'pick_sequencing_primers',
|
|
141
|
+
'SEQUENCE_TARGET' => "#{end_chunk.length},#{num_ns}",
|
|
142
|
+
'PRIMER_NUM_RETURN'=>'5',
|
|
143
|
+
'PRIMER_PRODUCT_SIZE_RANGE'=>"#{num_ns}-#{joined.length}",
|
|
144
|
+
}.merge(extra_primer3_options)
|
|
145
|
+
)
|
|
146
|
+
log.debug "primer3 returned the following result: #{result.output_hash.inspect}"
|
|
147
|
+
|
|
148
|
+
if result.yeh?
|
|
149
|
+
# Push each of the reported primers
|
|
150
|
+
fwds = []
|
|
151
|
+
reverses = []
|
|
152
|
+
(0...result['PRIMER_LEFT_NUM_RETURNED'].to_i).each do |pair_number|
|
|
153
|
+
fwds.push result["PRIMER_RIGHT_#{pair_number}_SEQUENCE"]
|
|
154
|
+
reverses.push result["PRIMER_LEFT_#{pair_number}_SEQUENCE"]
|
|
155
|
+
end
|
|
156
|
+
contig_name = contig.definition
|
|
157
|
+
|
|
158
|
+
f = PrimerList.new
|
|
159
|
+
f.contig_side = PrimerList::START_OF_CONTIG
|
|
160
|
+
f.primers = fwds
|
|
161
|
+
f.contig_name = contig_name
|
|
162
|
+
primer3_results.push f
|
|
163
|
+
|
|
164
|
+
r = PrimerList.new
|
|
165
|
+
r.contig_side = PrimerList::END_OF_CONTIG
|
|
166
|
+
r.primers = reverses
|
|
167
|
+
r.contig_name = contig_name
|
|
168
|
+
primer3_results.push r
|
|
169
|
+
else
|
|
170
|
+
log.error "For failing primer #{contig.definition}, primer3 result was: #{result.output_hash.inspect}"
|
|
171
|
+
log.error "No primers found for contig #{contig.definition}, giving up"
|
|
172
|
+
exit 1 unless options[:persevere]
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
log.info "Finished getting first round of primers, now have #{primer3_results.length} sets of primers e.g. #{primer3_results[0].inspect}"
|
|
176
|
+
|
|
177
|
+
if log.debug?
|
|
178
|
+
log.debug "Primer sets to be validated:"
|
|
179
|
+
primer3_results.each do |res|
|
|
180
|
+
log.debug res.inspect
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# Greedily try to find a set of primers such that one primer is picked from each location,
|
|
188
|
+
# and this total set of primers doesn't conflict in any way
|
|
189
|
+
|
|
190
|
+
# while not finished getting through the entire set
|
|
191
|
+
# Progress is measured as a lsit of indices. Once the indices length is greater than
|
|
192
|
+
failed = false
|
|
193
|
+
current_path = [0]
|
|
194
|
+
next_index_to_change = 0
|
|
195
|
+
primer_sets = primer3_results.collect{|s| s.primers}
|
|
196
|
+
|
|
197
|
+
while !failed and next_index_to_change < primer3_results.length
|
|
198
|
+
if next_index_to_change == 0
|
|
199
|
+
# garaunteed to be ok since there is only 1 primer
|
|
200
|
+
current_path[0]=0
|
|
201
|
+
|
|
202
|
+
# If there is no more, then we've failed.
|
|
203
|
+
if current_path[0]==primer_sets.length
|
|
204
|
+
failed = true
|
|
205
|
+
else
|
|
206
|
+
next_index_to_change += 1
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
else
|
|
210
|
+
# Change the next possible index
|
|
211
|
+
current_path[next_index_to_change] ||= -1
|
|
212
|
+
if current_path[next_index_to_change] >= primer_sets[next_index_to_change].length
|
|
213
|
+
# No more possibilities are available from this primer set, so have to backtrack
|
|
214
|
+
next_index_to_change -= 1
|
|
215
|
+
else
|
|
216
|
+
current_path[next_index_to_change] += 1
|
|
217
|
+
|
|
218
|
+
# Test whether this new primer conflicts with any of the old primers
|
|
219
|
+
primer_to_test = primer_sets[next_index_to_change][current_path[next_index_to_change]]
|
|
220
|
+
previous_primer_list = []
|
|
221
|
+
(0...next_index_to_change).each do |i|
|
|
222
|
+
previous_primer_list.push primer_sets[i][current_path[i]]
|
|
223
|
+
end
|
|
224
|
+
failed_against_prev = false
|
|
225
|
+
previous_primer_list.each_with_index do |prev, i|
|
|
226
|
+
log.debug "Testing #{primer_to_test.inspect} against #{prev.inspect}"
|
|
227
|
+
if Bio::Primer3.test_primer_compatibility(primer_to_test, prev, extra_primer3_options) == false
|
|
228
|
+
log.error "Found an incompatible pair of primers, unfortunately"
|
|
229
|
+
log.error "This route through the code has never been tested, so you'll have to take a look at the code to ensure there is no bugs. Or else run this program with different parameters. Exiting."
|
|
230
|
+
exit 1
|
|
231
|
+
failed_against_prev = true
|
|
232
|
+
break
|
|
233
|
+
else
|
|
234
|
+
log.debug "Compatible, cool"
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
if failed_against_prev
|
|
239
|
+
log.debug "At least one primer was incompatible, trying again"
|
|
240
|
+
# Do nothing, try to change the same index again
|
|
241
|
+
else
|
|
242
|
+
log.debug "All compatible, cool. Now moving to the next index"
|
|
243
|
+
current_path[next_index_to_change+1] = nil unless next_index_to_change+1 >= primer_sets.length
|
|
244
|
+
next_index_to_change += 1
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
if failed
|
|
251
|
+
log.error "Sorry, no sets of primers satisfy the criteria"
|
|
252
|
+
exit 1
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
# First just make sure that everything is ok here
|
|
256
|
+
log.info "Double checking to make sure there is no incompatibilities between primer pairs"
|
|
257
|
+
num_compared = 0
|
|
258
|
+
(0...primer_sets.length).to_a.combination(2) do |array|
|
|
259
|
+
primer1 = primer_sets[array[0]][current_path[array[0]]]
|
|
260
|
+
primer2 = primer_sets[array[1]][current_path[array[1]]]
|
|
261
|
+
result, res = Bio::Primer3.test_primer_compatibility(primer1, primer2, extra_primer3_options, :return_result => true)
|
|
262
|
+
num_compared += 1
|
|
263
|
+
|
|
264
|
+
if result == false
|
|
265
|
+
log.error "Programming error!! There was supposed to be an OK path, but that path wasn't OK in the validation (#{primer1} and #{primer2}) were the problem"
|
|
266
|
+
exit 1 unless options[:persevere]
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
log.info "Validated #{num_compared} different pairs of primers, they don't seem to conflict with each other at all, according to primer3's check primers thing"
|
|
270
|
+
|
|
271
|
+
# Check using in-silico PCR that all is ok
|
|
272
|
+
# First, running ipcress on the contigs not joined together shouldn't yield any products
|
|
273
|
+
log.debug "in-silico PCR: making sure there are no spurious primer pairings within the contigs themselves"
|
|
274
|
+
ipcress_options = {:min_distance => 1, :max_distance => 10000, :mismatches => 0}
|
|
275
|
+
num_compared = 0
|
|
276
|
+
(0...primer_sets.length).to_a.combination(2) do |array|
|
|
277
|
+
primer1 = primer_sets[array[0]][current_path[array[0]]]
|
|
278
|
+
primer2 = primer_sets[array[1]][current_path[array[1]]]
|
|
279
|
+
|
|
280
|
+
primer_set = Bio::Ipcress::PrimerSet.new primer1, primer2
|
|
281
|
+
result = Bio::Ipcress.run primer_set, options[:contigs_file], ipcress_options
|
|
282
|
+
num_compared += 1
|
|
283
|
+
log.debug "Ipcress output:"
|
|
284
|
+
log.debug result.inspect
|
|
285
|
+
|
|
286
|
+
unless result.length == 0
|
|
287
|
+
set1 = primer3_results[array[0]]
|
|
288
|
+
set2 = primer3_results[array[1]]
|
|
289
|
+
|
|
290
|
+
log.error "Unanticipated products generated from primer pair #{set1.contig_name}/#{set1.contig_side}/#{primer1} and #{set2.contig_name}/#{set2.contig_side}/#{primer2}. Sorry, fail."
|
|
291
|
+
exit 1 unless options[:persevere]
|
|
292
|
+
end
|
|
293
|
+
end
|
|
294
|
+
log.info "Validated #{num_compared} different pairs of primers so that unanticipated products are not formed according to iPCRess, and there doesn't seem to be any of those. Yey."
|
|
295
|
+
|
|
296
|
+
# For each pair of primers, join together the corresponding contigs and validate that a sequencing product would eventuate
|
|
297
|
+
num_compared = 0
|
|
298
|
+
contigs_hash = {}
|
|
299
|
+
contigs.each do |contig|
|
|
300
|
+
contigs_hash[contig.definition] = contig.seq
|
|
301
|
+
end
|
|
302
|
+
log.debug "in-silico PCR: making sure expected products are generated..."
|
|
303
|
+
(0...primer_sets.length).to_a.combination(2) do |array|
|
|
304
|
+
set1 = primer3_results[array[0]]
|
|
305
|
+
set2 = primer3_results[array[1]]
|
|
306
|
+
|
|
307
|
+
primer1 = set1.primers[current_path[array[0]]]
|
|
308
|
+
primer2 = set2.primers[current_path[array[1]]]
|
|
309
|
+
|
|
310
|
+
log.debug "Testing #{primer1} and #{primer2}: indicies #{array[0] } and #{array[1] }, contigs #{set1.contig_name} #{set1.contig_side } and #{set2.contig_name} #{set2.contig_side}"
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
primer_set = Bio::Ipcress::PrimerSet.new primer1, primer2
|
|
315
|
+
Tempfile.open('ipcress') do |tempfile|
|
|
316
|
+
# Have to correctly orient the template sequences
|
|
317
|
+
s = PrimerList::START_OF_CONTIG
|
|
318
|
+
e = PrimerList::END_OF_CONTIG
|
|
319
|
+
f1 = contigs_hash[set1.contig_name]
|
|
320
|
+
r1 = ' '+Bio::Sequence::NA.new(contigs_hash[set1.contig_name]).reverse_complement.to_s.upcase
|
|
321
|
+
f2 = contigs_hash[set2.contig_name]
|
|
322
|
+
r2 = ' '+Bio::Sequence::NA.new(contigs_hash[set2.contig_name]).reverse_complement.to_s.upcase
|
|
323
|
+
|
|
324
|
+
seqs_ordered = case [set1.contig_side, set2.contig_side]
|
|
325
|
+
when [s,s]
|
|
326
|
+
log.debug('case s s'); [r1,f2]
|
|
327
|
+
when [s,e]
|
|
328
|
+
log.debug('case s e'); [r1,r2]
|
|
329
|
+
when [e,s]
|
|
330
|
+
log.debug('case e s'); [f1,f2]
|
|
331
|
+
when [e,e]
|
|
332
|
+
log.debug('case e e'); [f1,r2]
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
tempfile.puts '>test'
|
|
337
|
+
tempfile.puts seqs_ordered[0]
|
|
338
|
+
tempfile.puts 'N'*100
|
|
339
|
+
tempfile.puts seqs_ordered[1]
|
|
340
|
+
tempfile.close
|
|
341
|
+
|
|
342
|
+
if log.debug?
|
|
343
|
+
log.debug "Testing with iPCRess #{primer1} and #{primer2} from #{set1.contig_name} and #{set2.contig_name}, respectively"
|
|
344
|
+
log.debug "Input fasta file"
|
|
345
|
+
log.debug `cat #{tempfile.path} >/tmp/ta` if log.debug?
|
|
346
|
+
log.debug "Fasta file input stats:"
|
|
347
|
+
log.debug `seqstat #{tempfile.path}`
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
results = Bio::Ipcress.run primer_set, tempfile.path, ipcress_options
|
|
352
|
+
num_compared += 1
|
|
353
|
+
unless results.length == 1
|
|
354
|
+
if results.length == 0
|
|
355
|
+
log.error "Anticipated products not generated in the hypothetical scenario of #{primer1} and #{primer2}, from #{set1.contig_name} and #{set2.contig_name}, respectively"
|
|
356
|
+
exit 1 unless options[:persevere]
|
|
357
|
+
else
|
|
358
|
+
log.error "Too many PCR products generated in the hypothetical scenario of #{primer1} and #{primer2}, from #{set1.contig_name} and #{set2.contig_name}, respectively"
|
|
359
|
+
log.error "Specifically, these PCR products were generated:"
|
|
360
|
+
results.each do |res|
|
|
361
|
+
log.error res.inspect
|
|
362
|
+
end
|
|
363
|
+
exit 1 unless options[:persevere]
|
|
364
|
+
end
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
num_compared += 1
|
|
368
|
+
end
|
|
369
|
+
log.info "Validated #{num_compared} different pairs of primers so that the anticipated products are formed according to iPCRess, and all seem to be as expected. Yey."
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
# If a universe of possible contigs was given, do any primer pairs match up?
|
|
373
|
+
num_compared = 0
|
|
374
|
+
if options[:contig_universe]
|
|
375
|
+
log.info "in-silico PCR: testing the contig universe. This could probably be sped up by only making a single call to ipcress, but oh well."
|
|
376
|
+
num_compared = 0
|
|
377
|
+
(0...primer_sets.length).to_a.combination(2) do |array|
|
|
378
|
+
primer1 = primer_sets[array[0]][current_path[array[0]]]
|
|
379
|
+
primer2 = primer_sets[array[1]][current_path[array[1]]]
|
|
380
|
+
|
|
381
|
+
primer_set = Bio::Ipcress::PrimerSet.new primer1, primer2
|
|
382
|
+
results = Bio::Ipcress.run primer_set, options[:contig_universe], ipcress_options
|
|
383
|
+
num_compared += 1
|
|
384
|
+
|
|
385
|
+
if results.length > 0
|
|
386
|
+
log.warn "Found #{results.length} matches between #{primer1} and #{primer2} in the contig universe, expected none."
|
|
387
|
+
exit 1 unless options[:persevere]
|
|
388
|
+
end
|
|
389
|
+
print '.' if log.info?
|
|
390
|
+
end
|
|
391
|
+
puts if log.info?
|
|
392
|
+
else
|
|
393
|
+
log.info "Not checking to see if primers match any other contigs not targeted are picked up by these primers, because no universe was specified."
|
|
394
|
+
end
|
|
395
|
+
log.debug "Tested #{num_compared} pairs of primers to see if anything else was hit, warnings above if any did so"
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
if current_path.length == contigs.length*2
|
|
399
|
+
log.info "Hoorah! Found an ok set of primers"
|
|
400
|
+
else
|
|
401
|
+
log.error "Unable to find a complete set of primers with the constraints given to primer3. Printing the primers that were found anyway.."
|
|
402
|
+
end
|
|
403
|
+
puts %w(Contig Side Index Primer).join("\t")
|
|
404
|
+
current_path.each_with_index do |primer_index, primer_set_index|
|
|
405
|
+
break if primer_set_index >= primer3_results.length
|
|
406
|
+
|
|
407
|
+
end_info = primer3_results[primer_set_index]
|
|
408
|
+
primer = end_info.primers[primer_index]
|
|
409
|
+
puts [
|
|
410
|
+
end_info.contig_name,
|
|
411
|
+
end_info.contig_side,
|
|
412
|
+
primer_index,
|
|
413
|
+
primer,
|
|
414
|
+
].join("\t")
|
|
415
|
+
end
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
class PrimerList
|
|
420
|
+
START_OF_CONTIG = 'start_of_contig'
|
|
421
|
+
END_OF_CONTIG = 'end_of_contig'
|
|
422
|
+
|
|
423
|
+
attr_accessor :contig_side, :primers, :contig_name
|
|
424
|
+
end
|
|
425
|
+
end
|