bio-velvet_underground 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitmodules +3 -0
- data/.travis.yml +13 -0
- data/Gemfile +19 -0
- data/LICENSE.txt +20 -0
- data/README.md +53 -0
- data/Rakefile +51 -0
- data/VERSION +1 -0
- data/ext/bioruby.patch +60 -0
- data/ext/mkrf_conf.rb +50 -0
- data/ext/src/Makefile +125 -0
- data/ext/src/src/allocArray.c +305 -0
- data/ext/src/src/allocArray.h +86 -0
- data/ext/src/src/autoOpen.c +107 -0
- data/ext/src/src/autoOpen.h +18 -0
- data/ext/src/src/binarySequences.c +813 -0
- data/ext/src/src/binarySequences.h +125 -0
- data/ext/src/src/concatenatedGraph.c +233 -0
- data/ext/src/src/concatenatedGraph.h +30 -0
- data/ext/src/src/concatenatedPreGraph.c +262 -0
- data/ext/src/src/concatenatedPreGraph.h +29 -0
- data/ext/src/src/correctedGraph.c +2642 -0
- data/ext/src/src/correctedGraph.h +32 -0
- data/ext/src/src/dfib.c +509 -0
- data/ext/src/src/dfib.h +69 -0
- data/ext/src/src/dfibHeap.c +89 -0
- data/ext/src/src/dfibHeap.h +39 -0
- data/ext/src/src/dfibpriv.h +105 -0
- data/ext/src/src/fib.c +628 -0
- data/ext/src/src/fib.h +78 -0
- data/ext/src/src/fibHeap.c +79 -0
- data/ext/src/src/fibHeap.h +41 -0
- data/ext/src/src/fibpriv.h +110 -0
- data/ext/src/src/globals.h +153 -0
- data/ext/src/src/graph.c +3983 -0
- data/ext/src/src/graph.h +233 -0
- data/ext/src/src/graphReConstruction.c +1472 -0
- data/ext/src/src/graphReConstruction.h +30 -0
- data/ext/src/src/graphStats.c +2167 -0
- data/ext/src/src/graphStats.h +72 -0
- data/ext/src/src/kmer.c +652 -0
- data/ext/src/src/kmer.h +73 -0
- data/ext/src/src/kmerOccurenceTable.c +236 -0
- data/ext/src/src/kmerOccurenceTable.h +44 -0
- data/ext/src/src/kseq.h +223 -0
- data/ext/src/src/locallyCorrectedGraph.c +557 -0
- data/ext/src/src/locallyCorrectedGraph.h +40 -0
- data/ext/src/src/passageMarker.c +677 -0
- data/ext/src/src/passageMarker.h +137 -0
- data/ext/src/src/preGraph.c +1717 -0
- data/ext/src/src/preGraph.h +106 -0
- data/ext/src/src/preGraphConstruction.c +990 -0
- data/ext/src/src/preGraphConstruction.h +26 -0
- data/ext/src/src/readCoherentGraph.c +557 -0
- data/ext/src/src/readCoherentGraph.h +30 -0
- data/ext/src/src/readSet.c +1734 -0
- data/ext/src/src/readSet.h +67 -0
- data/ext/src/src/recycleBin.c +199 -0
- data/ext/src/src/recycleBin.h +58 -0
- data/ext/src/src/roadMap.c +342 -0
- data/ext/src/src/roadMap.h +65 -0
- data/ext/src/src/run.c +318 -0
- data/ext/src/src/run.h +52 -0
- data/ext/src/src/run2.c +712 -0
- data/ext/src/src/scaffold.c +1876 -0
- data/ext/src/src/scaffold.h +64 -0
- data/ext/src/src/shortReadPairs.c +1243 -0
- data/ext/src/src/shortReadPairs.h +32 -0
- data/ext/src/src/splay.c +259 -0
- data/ext/src/src/splay.h +43 -0
- data/ext/src/src/splayTable.c +1315 -0
- data/ext/src/src/splayTable.h +31 -0
- data/ext/src/src/tightString.c +362 -0
- data/ext/src/src/tightString.h +82 -0
- data/ext/src/src/utility.c +199 -0
- data/ext/src/src/utility.h +98 -0
- data/ext/src/third-party/zlib-1.2.3/ChangeLog +855 -0
- data/ext/src/third-party/zlib-1.2.3/FAQ +339 -0
- data/ext/src/third-party/zlib-1.2.3/INDEX +51 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile +154 -0
- data/ext/src/third-party/zlib-1.2.3/Makefile.in +154 -0
- data/ext/src/third-party/zlib-1.2.3/README +125 -0
- data/ext/src/third-party/zlib-1.2.3/adler32.c +149 -0
- data/ext/src/third-party/zlib-1.2.3/algorithm.txt +209 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.pup +66 -0
- data/ext/src/third-party/zlib-1.2.3/amiga/Makefile.sas +65 -0
- data/ext/src/third-party/zlib-1.2.3/as400/bndsrc +132 -0
- data/ext/src/third-party/zlib-1.2.3/as400/compile.clp +123 -0
- data/ext/src/third-party/zlib-1.2.3/as400/readme.txt +111 -0
- data/ext/src/third-party/zlib-1.2.3/as400/zlib.inc +331 -0
- data/ext/src/third-party/zlib-1.2.3/compress.c +79 -0
- data/ext/src/third-party/zlib-1.2.3/configure +459 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/README.contrib +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/buffer_demo.adb +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/mtest.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/read.adb +156 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/readme.txt +65 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/test.adb +463 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.adb +225 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-streams.ads +114 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.adb +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib-thin.ads +450 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.adb +701 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.ads +328 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/ada/zlib.gpr +20 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/README.586 +43 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm586/match.S +364 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/README.686 +34 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/asm686/match.S +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/README +4 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.c +444 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/blast.h +71 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.pk +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/blast/test.txt +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLib.pas +557 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/ZLibConst.pas +11 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/delphi/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.build +33 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.chm +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib.sln +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/AssemblyInfo.cs +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/ChecksumImpl.cs +202 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CircularBuffer.cs +83 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/CodecBase.cs +198 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Deflater.cs +106 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.cs +288 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/DotZLib.csproj +141 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/GZipStream.cs +301 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/Inflater.cs +105 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/DotZLib/UnitTests.cs +274 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/LICENSE_1_0.txt +23 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/dotzlib/readme.txt +58 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/README +1 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.c +608 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/infback9.h +37 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inffix9.h +107 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inflate9.h +47 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.c +323 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/infback9/inftree9.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffas86.c +1157 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/inflate86/inffast.S +1368 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/test.cpp +24 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.cpp +329 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream/zfstream.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream.h +307 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream2/zstream_test.cpp +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/README +35 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/TODO +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/test.cc +50 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.cc +479 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/iostream3/zfstream.h +466 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masm686/match.asm +413 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/bld_ml64.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.asm +513 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/gvmat64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffas8664.c +186 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.asm +392 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/inffasx64.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx64/readme.txt +28 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/bld_ml32.bat +2 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.asm +972 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/gvmat32c.c +62 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.asm +1083 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/inffas32.obj +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/mkasm.bat +3 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/masmx86/readme.txt +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ChangeLogUnzip +67 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/Makefile +25 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/crypt.h +132 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.c +177 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/ioapi.h +75 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.c +270 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/iowin32.h +21 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/miniunz.c +585 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/minizip.c +420 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.c +281 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/mztools.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.c +1598 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/unzip.h +354 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/minizip/zip.h +235 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/example.pas +599 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/readme.txt +76 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibd32.mak +93 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/pascal/zlibpas.pas +236 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/Makefile +8 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/README +63 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.c +837 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/puff.h +31 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/puff/zeros.raw +0 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.c +275 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/testzlib/testzlib.txt +10 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile +14 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/Makefile.msc +17 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/untgz/untgz.c +674 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/readme.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/miniunz.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/minizip.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/testzlib.vcproj +126 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibstat.vcproj +246 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.sln +78 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc7/zlibvc.vcproj +445 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/miniunz.vcproj +566 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/minizip.vcproj +563 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlib.vcproj +948 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/testzlibdll.vcproj +567 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlib.rc +32 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibstat.vcproj +870 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.def +92 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.sln +144 -0
- data/ext/src/third-party/zlib-1.2.3/contrib/vstudio/vc8/zlibvc.vcproj +1219 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.c +423 -0
- data/ext/src/third-party/zlib-1.2.3/crc32.h +441 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.c +1736 -0
- data/ext/src/third-party/zlib-1.2.3/deflate.h +331 -0
- data/ext/src/third-party/zlib-1.2.3/example.c +565 -0
- data/ext/src/third-party/zlib-1.2.3/examples/README.examples +42 -0
- data/ext/src/third-party/zlib-1.2.3/examples/fitblk.c +233 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gun.c +693 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzappend.c +500 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzjoin.c +448 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.c +413 -0
- data/ext/src/third-party/zlib-1.2.3/examples/gzlog.h +58 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zlib_how.html +523 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zpipe.c +191 -0
- data/ext/src/third-party/zlib-1.2.3/examples/zran.c +404 -0
- data/ext/src/third-party/zlib-1.2.3/gzio.c +1026 -0
- data/ext/src/third-party/zlib-1.2.3/infback.c +623 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/inffast.h +11 -0
- data/ext/src/third-party/zlib-1.2.3/inffixed.h +94 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.c +1368 -0
- data/ext/src/third-party/zlib-1.2.3/inflate.h +115 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.c +329 -0
- data/ext/src/third-party/zlib-1.2.3/inftrees.h +55 -0
- data/ext/src/third-party/zlib-1.2.3/make_vms.com +461 -0
- data/ext/src/third-party/zlib-1.2.3/minigzip.c +322 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.bor +109 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.dj2 +104 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.msc +106 -0
- data/ext/src/third-party/zlib-1.2.3/msdos/Makefile.tc +94 -0
- data/ext/src/third-party/zlib-1.2.3/old/Makefile.riscos +151 -0
- data/ext/src/third-party/zlib-1.2.3/old/README +3 -0
- data/ext/src/third-party/zlib-1.2.3/old/descrip.mms +48 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/Makefile.os2 +136 -0
- data/ext/src/third-party/zlib-1.2.3/old/os2/zlib.def +51 -0
- data/ext/src/third-party/zlib-1.2.3/old/visual-basic.txt +160 -0
- data/ext/src/third-party/zlib-1.2.3/old/zlib.html +971 -0
- data/ext/src/third-party/zlib-1.2.3/projects/README.projects +41 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/README.txt +73 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/example.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/minigzip.dsp +278 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsp +609 -0
- data/ext/src/third-party/zlib-1.2.3/projects/visualc6/zlib.dsw +59 -0
- data/ext/src/third-party/zlib-1.2.3/qnx/package.qpg +141 -0
- data/ext/src/third-party/zlib-1.2.3/trees.c +1219 -0
- data/ext/src/third-party/zlib-1.2.3/trees.h +128 -0
- data/ext/src/third-party/zlib-1.2.3/uncompr.c +61 -0
- data/ext/src/third-party/zlib-1.2.3/win32/DLL_FAQ.txt +397 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.bor +107 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.emx +69 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.gcc +141 -0
- data/ext/src/third-party/zlib-1.2.3/win32/Makefile.msc +126 -0
- data/ext/src/third-party/zlib-1.2.3/win32/VisualC.txt +3 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib.def +60 -0
- data/ext/src/third-party/zlib-1.2.3/win32/zlib1.rc +39 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zconf.in.h +332 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.3 +159 -0
- data/ext/src/third-party/zlib-1.2.3/zlib.h +1357 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.c +318 -0
- data/ext/src/third-party/zlib-1.2.3/zutil.h +269 -0
- data/lib/bio-velvet_underground.rb +12 -0
- data/lib/bio-velvet_underground/external/VERSION +1 -0
- data/lib/bio-velvet_underground/velvet_underground.rb +72 -0
- data/spec/binary_sequence_store_spec.rb +27 -0
- data/spec/data/1/CnyUnifiedSeq +0 -0
- data/spec/spec_helper.rb +31 -0
- metadata +456 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
3
|
+
|
4
|
+
This file is part of Velvet.
|
5
|
+
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU General Public License as published by
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU General Public License
|
17
|
+
along with Velvet; if not, write to the Free Software
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
+
|
20
|
+
*/
|
21
|
+
#ifndef _ROADMAP_H_
|
22
|
+
#define _ROADMAP_H_
|
23
|
+
|
24
|
+
#include <stdio.h>
|
25
|
+
|
26
|
+
struct roadMapArray_st {
|
27
|
+
RoadMap *array;
|
28
|
+
Annotation *annotations;
|
29
|
+
IDnum length;
|
30
|
+
int WORDLENGTH;
|
31
|
+
boolean double_strand;
|
32
|
+
IDnum referenceCount;
|
33
|
+
};
|
34
|
+
|
35
|
+
////////////////////////////////////////////////////////////////////
|
36
|
+
// Annotation stuff
|
37
|
+
////////////////////////////////////////////////////////////////////
|
38
|
+
IDnum getAnnotSequenceID(Annotation * annot);
|
39
|
+
Coordinate getFinish(Annotation * annot);
|
40
|
+
Coordinate getStart(Annotation * annot);
|
41
|
+
Coordinate getPosition(Annotation * annot);
|
42
|
+
Coordinate getAnnotationLength(Annotation * annot);
|
43
|
+
void incrementAnnotationCoordinates(Annotation * annot);
|
44
|
+
|
45
|
+
void setStartID(Annotation * annot, IDnum nodeID);
|
46
|
+
IDnum getStartID(Annotation * annot);
|
47
|
+
void setFinishID(Annotation * annot, IDnum nodeID);
|
48
|
+
IDnum getFinishID(Annotation * annot);
|
49
|
+
|
50
|
+
Annotation *getNextAnnotation(Annotation * annot);
|
51
|
+
Annotation *getAnnotationInArray(Annotation * annot, Coordinate index);
|
52
|
+
|
53
|
+
////////////////////////////////////////////////////////////////////
|
54
|
+
// RoadMap stuff
|
55
|
+
////////////////////////////////////////////////////////////////////
|
56
|
+
|
57
|
+
IDnum getAnnotationCount(RoadMap * rdmap);
|
58
|
+
|
59
|
+
RoadMap *getRoadMapInArray(RoadMapArray * array, IDnum index);
|
60
|
+
|
61
|
+
// Same thing but for the RoadMap file generated by the hash
|
62
|
+
RoadMapArray *importRoadMapArray(char *filename);
|
63
|
+
RoadMapArray *importReferenceRoadMapArray(char * filename);
|
64
|
+
void destroyRoadMapArray(RoadMapArray * rdmap);
|
65
|
+
#endif
|
data/ext/src/src/run.c
ADDED
@@ -0,0 +1,318 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
3
|
+
|
4
|
+
This file is part of Velvet.
|
5
|
+
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU General Public License as published by
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU General Public License
|
17
|
+
along with Velvet; if not, write to the Free Software
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
+
|
20
|
+
*/
|
21
|
+
#include <stdlib.h>
|
22
|
+
#include <string.h>
|
23
|
+
#include <sys/stat.h>
|
24
|
+
#if defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
|
25
|
+
#include <uce-dirent.h>
|
26
|
+
#define Arc v_Arc
|
27
|
+
#else
|
28
|
+
#include <dirent.h>
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#include "run.h"
|
32
|
+
|
33
|
+
static void printUsage()
|
34
|
+
{
|
35
|
+
puts("Usage:");
|
36
|
+
puts("./velveth directory hash_length {[-file_format][-read_type][-separate|-interleaved] filename1 [filename2 ...]} {...} [options]");
|
37
|
+
puts("");
|
38
|
+
puts("\tdirectory\t: directory name for output files");
|
39
|
+
printf("\thash_length\t: EITHER an odd integer (if even, it will be decremented) <= %i (if above, will be reduced)\n", MAXKMERLENGTH);
|
40
|
+
printf("\t\t\t: OR: m,M,s where m and M are odd integers (if not, they will be decremented) with m < M <= %i (if above, will be reduced)\n", MAXKMERLENGTH);
|
41
|
+
puts("\t\t\t\tand s is a step (even number). Velvet will then hash from k=m to k=M with a step of s");
|
42
|
+
puts("\tfilename\t: path to sequence file or - for standard input");
|
43
|
+
puts("");
|
44
|
+
puts("File format options:");
|
45
|
+
puts("\t-fasta\t-fastq\t-raw\t-fasta.gz\t-fastq.gz\t-raw.gz\t-sam\t-bam\t-fmtAuto");
|
46
|
+
puts("\t(Note: -fmtAuto will detect fasta or fastq, and will try the following programs for decompression : gunzip, pbunzip2, bunzip2");
|
47
|
+
puts("");
|
48
|
+
puts("File layout options for paired reads (only for fasta and fastq formats):");
|
49
|
+
puts("\t-interleaved\t: File contains paired reads interleaved in the one file (default)");
|
50
|
+
puts("\t-separate\t: Read 2 separate files for paired reads");
|
51
|
+
puts("");
|
52
|
+
puts("Read type options:");
|
53
|
+
puts("\t-short\t-shortPaired");
|
54
|
+
#if CATEGORIES <= 5
|
55
|
+
Category cat;
|
56
|
+
for (cat = 2; cat <= CATEGORIES; cat++)
|
57
|
+
printf("\t-short%i\t-shortPaired%i\n", cat, cat);
|
58
|
+
#else
|
59
|
+
puts("\t...");
|
60
|
+
printf("\t-short%i\t-shortPaired%i\n", CATEGORIES - 1, CATEGORIES - 1);
|
61
|
+
printf("\t-short%i\t-shortPaired%i\n", CATEGORIES, CATEGORIES);
|
62
|
+
#endif
|
63
|
+
puts("\t-long\t-longPaired");
|
64
|
+
puts("\t-reference");
|
65
|
+
puts("");
|
66
|
+
puts("Options:");
|
67
|
+
puts("\t-strand_specific\t: for strand specific transcriptome sequencing data (default: off)");
|
68
|
+
puts("\t-reuse_Sequences\t: reuse Sequences file (or link) already in directory (no need to provide original filenames in this case (default: off)");
|
69
|
+
puts("\t-reuse_binary\t: reuse binary sequences file (or link) already in directory (no need to provide original filenames in this case (default: off)");
|
70
|
+
puts("\t-noHash\t\t\t: simply prepare Sequences file, do not hash reads or prepare Roadmaps file (default: off)");
|
71
|
+
puts("\t-create_binary \t: create binary CnyUnifiedSeq file (default: off)");
|
72
|
+
puts("");
|
73
|
+
puts("Synopsis:");
|
74
|
+
puts("");
|
75
|
+
puts("- Short single end reads:");
|
76
|
+
puts("\tvelveth Assem 29 -short -fastq s_1_sequence.txt");
|
77
|
+
puts("");
|
78
|
+
puts("- Paired-end short reads (remember to interleave paired reads):");
|
79
|
+
puts("\tvelveth Assem 31 -shortPaired -fasta interleaved.fna");
|
80
|
+
puts("");
|
81
|
+
puts("- Paired-end short reads (using separate files for the paired reads)");
|
82
|
+
puts("\tvelveth Assem 31 -shortPaired -fasta -separate left.fa right.fa");
|
83
|
+
puts("");
|
84
|
+
puts("- Two channels and some long reads:");
|
85
|
+
puts("\tvelveth Assem 43 -short -fastq unmapped.fna -longPaired -fasta SangerReads.fasta");
|
86
|
+
puts("");
|
87
|
+
puts("- Three channels:");
|
88
|
+
puts("\tvelveth Assem 35 -shortPaired -fasta pe_lib1.fasta -shortPaired2 pe_lib2.fasta -short3 se_lib1.fa");
|
89
|
+
puts("");
|
90
|
+
puts("Output:");
|
91
|
+
puts("\tdirectory/Roadmaps");
|
92
|
+
puts("\tdirectory/Sequences");
|
93
|
+
puts("\t\t[Both files are picked up by graph, so please leave them there]");
|
94
|
+
}
|
95
|
+
|
96
|
+
int main(int argc, char **argv)
|
97
|
+
{
|
98
|
+
ReadSet *allSequences = NULL;
|
99
|
+
SplayTable *splayTable;
|
100
|
+
int hashLength, hashLengthStep, hashLengthMax, h;
|
101
|
+
char *directory, *filename, *seqFilename, *baseSeqName, *buf;
|
102
|
+
char * token;
|
103
|
+
boolean double_strand = true;
|
104
|
+
boolean noHash = false;
|
105
|
+
boolean multiple_kmers = false;
|
106
|
+
char buffer[100];
|
107
|
+
DIR *dir;
|
108
|
+
|
109
|
+
setProgramName("velveth");
|
110
|
+
|
111
|
+
if (argc < 4) {
|
112
|
+
printf("velveth - simple hashing program\n");
|
113
|
+
printf("Version %i.%i.%2.2i\n", VERSION_NUMBER,
|
114
|
+
RELEASE_NUMBER, UPDATE_NUMBER);
|
115
|
+
printf("\nCopyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)\n");
|
116
|
+
printf("This is free software; see the source for copying conditions. There is NO\n");
|
117
|
+
printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\n");
|
118
|
+
printf("Compilation settings:\n");
|
119
|
+
printf("CATEGORIES = %i\n", CATEGORIES);
|
120
|
+
printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH);
|
121
|
+
#ifdef _OPENMP
|
122
|
+
puts("OPENMP");
|
123
|
+
#endif
|
124
|
+
#ifdef LONGSEQUENCES
|
125
|
+
puts("LONGSEQUENCES");
|
126
|
+
#endif
|
127
|
+
#ifdef BIGASSEMBLY
|
128
|
+
puts("BIGASSEMBLY");
|
129
|
+
#endif
|
130
|
+
#ifdef COLOR
|
131
|
+
puts("COLOR");
|
132
|
+
#endif
|
133
|
+
#ifdef DEBUG
|
134
|
+
puts("DEBUG");
|
135
|
+
#endif
|
136
|
+
printf("\n");
|
137
|
+
printUsage();
|
138
|
+
return 0;
|
139
|
+
}
|
140
|
+
|
141
|
+
strcpy(buffer, argv[2]);
|
142
|
+
token = strtok(buffer, ",");
|
143
|
+
hashLength = atoi(token);
|
144
|
+
token = strtok(NULL, ",");
|
145
|
+
if (token == NULL) {
|
146
|
+
multiple_kmers = false;
|
147
|
+
hashLengthMax = hashLength + 1;
|
148
|
+
} else {
|
149
|
+
multiple_kmers = true;
|
150
|
+
hashLengthMax = atoi(token);
|
151
|
+
}
|
152
|
+
token = strtok(NULL, ",");
|
153
|
+
if (token == NULL) {
|
154
|
+
hashLengthStep = 2;
|
155
|
+
} else {
|
156
|
+
hashLengthStep = atoi(token);
|
157
|
+
}
|
158
|
+
|
159
|
+
if (hashLength > MAXKMERLENGTH) {
|
160
|
+
velvetLog
|
161
|
+
("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
|
162
|
+
hashLength, MAXKMERLENGTH);
|
163
|
+
hashLength = MAXKMERLENGTH;
|
164
|
+
}
|
165
|
+
if (hashLength <= 0) {
|
166
|
+
velvetLog("Invalid hash length: %s\n", argv[2]);
|
167
|
+
printUsage();
|
168
|
+
return 0;
|
169
|
+
}
|
170
|
+
if (hashLength % 2 == 0) {
|
171
|
+
velvetLog
|
172
|
+
("Velvet can't work with even length k-mers, such as %i. We'll use %i instead, if you don't mind.\n",
|
173
|
+
hashLength, hashLength - 1);
|
174
|
+
hashLength--;
|
175
|
+
}
|
176
|
+
|
177
|
+
if (multiple_kmers) {
|
178
|
+
if (hashLengthMax > MAXKMERLENGTH + 1) {
|
179
|
+
velvetLog
|
180
|
+
("Velvet can't handle k-mers as long as %i! We'll stick to %i if you don't mind.\n",
|
181
|
+
hashLengthMax, MAXKMERLENGTH + 1);
|
182
|
+
hashLengthMax = MAXKMERLENGTH + 1;
|
183
|
+
}
|
184
|
+
if (hashLengthMax <= hashLength) {
|
185
|
+
velvetLog("hashLengthMin < hashLengthMax is required %s", argv[2]);
|
186
|
+
printUsage();
|
187
|
+
return 0;
|
188
|
+
}
|
189
|
+
|
190
|
+
if (hashLengthStep <= 0) {
|
191
|
+
velvetLog("Non-positive hash length! Setting it to 2\n");
|
192
|
+
hashLengthStep = 2;
|
193
|
+
}
|
194
|
+
if (hashLengthStep % 2 == 1) {
|
195
|
+
velvetLog
|
196
|
+
("Velvet can't work with an odd length k-mer step, such as %i. We'll use %i instead, if you don't mind.\n",
|
197
|
+
hashLengthStep, hashLengthStep + 1);
|
198
|
+
hashLengthStep++;
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
// check if binary sequences should be used
|
203
|
+
int argIndex;
|
204
|
+
for (argIndex = 3; argIndex < argc; argIndex++)
|
205
|
+
if (strcmp(argv[argIndex], "-create_binary") == 0 || strcmp(argv[argIndex], "-reuse_binary") == 0)
|
206
|
+
setCreateBinary(true);
|
207
|
+
|
208
|
+
for (h = hashLength; h < hashLengthMax; h += hashLengthStep) {
|
209
|
+
|
210
|
+
resetWordFilter(h);
|
211
|
+
|
212
|
+
buf = mallocOrExit(2 * strlen(argv[1]) + 500, char);
|
213
|
+
|
214
|
+
if ( multiple_kmers ) {
|
215
|
+
sprintf(buf,"%s_%d",argv[1],h);
|
216
|
+
directory = mallocOrExit(strlen(buf) + 100, char);
|
217
|
+
strcpy(directory,buf);
|
218
|
+
} else
|
219
|
+
directory = argv[1];
|
220
|
+
|
221
|
+
filename = mallocOrExit(strlen(directory) + 100, char);
|
222
|
+
seqFilename = mallocOrExit(strlen(directory) + 100, char);
|
223
|
+
baseSeqName = mallocOrExit(100, char);
|
224
|
+
|
225
|
+
dir = opendir(directory);
|
226
|
+
|
227
|
+
if (dir == NULL)
|
228
|
+
mkdir(directory, 0777);
|
229
|
+
else {
|
230
|
+
sprintf(buf, "%s/PreGraph", directory);
|
231
|
+
remove(buf);
|
232
|
+
sprintf(buf, "%s/Graph", directory);
|
233
|
+
remove(buf);
|
234
|
+
sprintf(buf, "%s/Graph2", directory);
|
235
|
+
remove(buf);
|
236
|
+
sprintf(buf, "%s/Graph3", directory);
|
237
|
+
remove(buf);
|
238
|
+
sprintf(buf, "%s/Graph4", directory);
|
239
|
+
remove(buf);
|
240
|
+
}
|
241
|
+
|
242
|
+
logInstructions(argc, argv, directory);
|
243
|
+
|
244
|
+
strcpy(seqFilename, directory);
|
245
|
+
if (isCreateBinary()) {
|
246
|
+
// use the CNY unified seq writer
|
247
|
+
strcpy(baseSeqName, "/CnyUnifiedSeq");
|
248
|
+
// remove other style sequences file
|
249
|
+
sprintf(buf, "%s/Sequences", directory);
|
250
|
+
remove(buf);
|
251
|
+
} else {
|
252
|
+
strcpy(baseSeqName, "/Sequences");
|
253
|
+
// remove other style sequences file
|
254
|
+
sprintf(buf, "%s/CnyUnifiedSeq", directory);
|
255
|
+
remove(buf);
|
256
|
+
sprintf(buf, "%s/CnyUnifiedSeq.names", directory);
|
257
|
+
remove(buf);
|
258
|
+
}
|
259
|
+
strcat(seqFilename, baseSeqName);
|
260
|
+
|
261
|
+
if ( h == hashLength ) {
|
262
|
+
parseDataAndReadFiles(seqFilename, argc - 2, &(argv[2]), &double_strand, &noHash);
|
263
|
+
} else {
|
264
|
+
sprintf(buf,"rm -f %s",seqFilename);
|
265
|
+
if (system(buf)) {
|
266
|
+
velvetLog("Command failed!\n");
|
267
|
+
velvetLog("%s\n", buf);
|
268
|
+
#ifdef DEBUG
|
269
|
+
abort();
|
270
|
+
#endif
|
271
|
+
exit(1);
|
272
|
+
}
|
273
|
+
if (argv[1][0] == '/')
|
274
|
+
sprintf(buf,"ln -s %s_%d%s %s",argv[1],hashLength,baseSeqName,seqFilename);
|
275
|
+
else
|
276
|
+
sprintf(buf,"ln -s `pwd`/%s_%d%s %s",argv[1],hashLength,baseSeqName,seqFilename);
|
277
|
+
if (system(buf)) {
|
278
|
+
velvetLog("Command failed!\n");
|
279
|
+
velvetLog("%s\n", buf);
|
280
|
+
#ifdef DEBUG
|
281
|
+
abort();
|
282
|
+
#endif
|
283
|
+
exit(1);
|
284
|
+
}
|
285
|
+
}
|
286
|
+
|
287
|
+
if (noHash)
|
288
|
+
continue;
|
289
|
+
|
290
|
+
splayTable = newSplayTable(h, double_strand);
|
291
|
+
if (isCreateBinary()) {
|
292
|
+
allSequences = importCnyReadSet(seqFilename);
|
293
|
+
} else {
|
294
|
+
allSequences = importReadSet(seqFilename);
|
295
|
+
}
|
296
|
+
velvetLog("%li sequences in total.\n", (long) allSequences->readCount);
|
297
|
+
|
298
|
+
strcpy(filename, directory);
|
299
|
+
strcat(filename, "/Roadmaps");
|
300
|
+
inputSequenceArrayIntoSplayTableAndArchive(allSequences,
|
301
|
+
splayTable, filename, seqFilename);
|
302
|
+
|
303
|
+
destroySplayTable(splayTable);
|
304
|
+
if (dir)
|
305
|
+
closedir(dir);
|
306
|
+
if (directory != argv[1])
|
307
|
+
free(directory);
|
308
|
+
free(filename);
|
309
|
+
free(seqFilename);
|
310
|
+
free(baseSeqName);
|
311
|
+
free(buf);
|
312
|
+
if (allSequences) {
|
313
|
+
destroyReadSet(allSequences);
|
314
|
+
}
|
315
|
+
}
|
316
|
+
|
317
|
+
return 0;
|
318
|
+
}
|
data/ext/src/src/run.h
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
3
|
+
|
4
|
+
This file is part of Velvet.
|
5
|
+
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU General Public License as published by
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU General Public License
|
17
|
+
along with Velvet; if not, write to the Free Software
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
+
|
20
|
+
*/
|
21
|
+
// Compilation
|
22
|
+
#include "globals.h"
|
23
|
+
|
24
|
+
// Utilities
|
25
|
+
#include "graphStats.h"
|
26
|
+
#include "utility.h"
|
27
|
+
|
28
|
+
// Datastructures
|
29
|
+
#include "kmer.h"
|
30
|
+
#include "readSet.h"
|
31
|
+
#include "tightString.h"
|
32
|
+
#include "roadMap.h"
|
33
|
+
#include "splayTable.h"
|
34
|
+
#include "graph.h"
|
35
|
+
#include "scaffold.h"
|
36
|
+
#include "binarySequences.h"
|
37
|
+
|
38
|
+
// PreGraph operations
|
39
|
+
#include "preGraph.h"
|
40
|
+
#include "preGraphConstruction.h"
|
41
|
+
#include "concatenatedPreGraph.h"
|
42
|
+
|
43
|
+
// Graph operations
|
44
|
+
#include "graph.h"
|
45
|
+
#include "graphReConstruction.h"
|
46
|
+
#include "concatenatedGraph.h"
|
47
|
+
#include "correctedGraph.h"
|
48
|
+
#include "locallyCorrectedGraph.h"
|
49
|
+
|
50
|
+
// Repeat resolution
|
51
|
+
#include "readCoherentGraph.h"
|
52
|
+
#include "shortReadPairs.h"
|
data/ext/src/src/run2.c
ADDED
@@ -0,0 +1,712 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
3
|
+
|
4
|
+
This file is part of Velvet.
|
5
|
+
|
6
|
+
Velvet is free software; you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU General Public License as published by
|
8
|
+
the Free Software Foundation; either version 2 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
Velvet is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU General Public License
|
17
|
+
along with Velvet; if not, write to the Free Software
|
18
|
+
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
+
|
20
|
+
*/
|
21
|
+
#include <stdlib.h>
|
22
|
+
#include <stdio.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include <unistd.h>
|
25
|
+
|
26
|
+
#include "run.h"
|
27
|
+
|
28
|
+
#include "binarySequences.h"
|
29
|
+
#include "globals.h"
|
30
|
+
|
31
|
+
static void printUsage()
|
32
|
+
{
|
33
|
+
puts("Usage:");
|
34
|
+
puts("./velvetg directory [options]");
|
35
|
+
puts("");
|
36
|
+
puts("\tdirectory\t\t\t: working directory name");
|
37
|
+
puts("");
|
38
|
+
puts("Standard options:");
|
39
|
+
puts("\t-cov_cutoff <floating-point|auto>\t: removal of low coverage nodes AFTER tour bus or allow the system to infer it");
|
40
|
+
puts("\t\t(default: no removal)");
|
41
|
+
puts("\t-ins_length <integer>\t\t: expected distance between two paired end reads (default: no read pairing)");
|
42
|
+
puts("\t-read_trkg <yes|no>\t\t: tracking of short read positions in assembly (default: no tracking)");
|
43
|
+
puts("\t-min_contig_lgth <integer>\t: minimum contig length exported to contigs.fa file (default: hash length * 2)");
|
44
|
+
puts("\t-amos_file <yes|no>\t\t: export assembly to AMOS file (default: no export)");
|
45
|
+
puts("\t-exp_cov <floating point|auto>\t: expected coverage of unique regions or allow the system to infer it");
|
46
|
+
puts("\t\t(default: no long or paired-end read resolution)");
|
47
|
+
puts("\t-long_cov_cutoff <floating-point>: removal of nodes with low long-read coverage AFTER tour bus");
|
48
|
+
puts("\t\t(default: no removal)");
|
49
|
+
puts("");
|
50
|
+
puts("Advanced options:");
|
51
|
+
puts("\t-ins_length* <integer>\t\t: expected distance between two paired-end reads in the respective short-read dataset (default: no read pairing)");
|
52
|
+
puts("\t-ins_length_long <integer>\t: expected distance between two long paired-end reads (default: no read pairing)");
|
53
|
+
puts("\t-ins_length*_sd <integer>\t: est. standard deviation of respective dataset (default: 10% of corresponding length)");
|
54
|
+
puts("\t\t[replace '*' by nothing, '2' or '_long' as necessary]");
|
55
|
+
puts("\t-scaffolding <yes|no>\t\t: scaffolding of contigs used paired end information (default: on)");
|
56
|
+
puts("\t-max_branch_length <integer>\t: maximum length in base pair of bubble (default: 100)");
|
57
|
+
puts("\t-max_divergence <floating-point>: maximum divergence rate between two branches in a bubble (default: 0.2)");
|
58
|
+
puts("\t-max_gap_count <integer>\t: maximum number of gaps allowed in the alignment of the two branches of a bubble (default: 3)");
|
59
|
+
puts("\t-min_pair_count <integer>\t: minimum number of paired end connections to justify the scaffolding of two long contigs (default: 5)");
|
60
|
+
puts("\t-max_coverage <floating point>\t: removal of high coverage nodes AFTER tour bus (default: no removal)");
|
61
|
+
puts("\t-coverage_mask <int>\t: minimum coverage required for confident regions of contigs (default: 1)");
|
62
|
+
puts("\t-long_mult_cutoff <int>\t\t: minimum number of long reads required to merge contigs (default: 2)");
|
63
|
+
puts("\t-unused_reads <yes|no>\t\t: export unused reads in UnusedReads.fa file (default: no)");
|
64
|
+
puts("\t-alignments <yes|no>\t\t: export a summary of contig alignment to the reference sequences (default: no)");
|
65
|
+
puts("\t-exportFiltered <yes|no>\t: export the long nodes which were eliminated by the coverage filters (default: no)");
|
66
|
+
puts("\t-clean <yes|no>\t\t\t: remove all the intermediary files which are useless for recalculation (default : no)");
|
67
|
+
puts("\t-very_clean <yes|no>\t\t: remove all the intermediary files (no recalculation possible) (default: no)");
|
68
|
+
puts("\t-paired_exp_fraction <double>\t: remove all the paired end connections which less than the specified fraction of the expected count (default: 0.1)");
|
69
|
+
puts("\t-shortMatePaired* <yes|no>\t: for mate-pair libraries, indicate that the library might be contaminated with paired-end reads (default no)");
|
70
|
+
puts("\t-conserveLong <yes|no>\t\t: preserve sequences with long reads in them (default no)");
|
71
|
+
puts("");
|
72
|
+
puts("Output:");
|
73
|
+
puts("\tdirectory/contigs.fa\t\t: fasta file of contigs longer than twice hash length");
|
74
|
+
puts("\tdirectory/stats.txt\t\t: stats file (tab-spaced) useful for determining appropriate coverage cutoff");
|
75
|
+
puts("\tdirectory/LastGraph\t\t: special formatted file with all the information on the final graph");
|
76
|
+
puts("\tdirectory/velvet_asm.afg\t: (if requested) AMOS compatible assembly file");
|
77
|
+
}
|
78
|
+
|
79
|
+
int main(int argc, char **argv)
|
80
|
+
{
|
81
|
+
ReadSet *sequences = NULL;
|
82
|
+
RoadMapArray *rdmaps;
|
83
|
+
PreGraph *preGraph;
|
84
|
+
Graph *graph;
|
85
|
+
char *directory, *graphFilename, *connectedGraphFilename,
|
86
|
+
*preGraphFilename, *seqFilename, *roadmapFilename,
|
87
|
+
*lowCovContigsFilename, *highCovContigsFilename;
|
88
|
+
double coverageCutoff = -1;
|
89
|
+
double longCoverageCutoff = -1;
|
90
|
+
double maxCoverageCutoff = -1;
|
91
|
+
double expectedCoverage = -1;
|
92
|
+
Coordinate minContigLength = -1;
|
93
|
+
Coordinate minContigKmerLength;
|
94
|
+
boolean *dubious = NULL;
|
95
|
+
Coordinate insertLength[CATEGORIES];
|
96
|
+
Coordinate insertLengthLong = -1;
|
97
|
+
Coordinate std_dev[CATEGORIES];
|
98
|
+
Coordinate std_dev_long = -1;
|
99
|
+
short int accelerationBits = 24;
|
100
|
+
boolean readTracking = false;
|
101
|
+
boolean exportAssembly = false;
|
102
|
+
boolean unusedReads = false;
|
103
|
+
boolean estimateCoverage = false;
|
104
|
+
boolean estimateCutoff = false;
|
105
|
+
boolean exportAlignments = false;
|
106
|
+
FILE *file;
|
107
|
+
int arg_index, arg_int;
|
108
|
+
double arg_double;
|
109
|
+
char *arg;
|
110
|
+
ShortLength *sequenceLengths = NULL;
|
111
|
+
Category cat;
|
112
|
+
boolean scaffolding = true;
|
113
|
+
int pebbleRounds = 1;
|
114
|
+
long long longlong_var;
|
115
|
+
short int short_var;
|
116
|
+
boolean exportFilteredNodes = false;
|
117
|
+
int clean = 0;
|
118
|
+
boolean conserveLong = false;
|
119
|
+
boolean shadows[CATEGORIES];
|
120
|
+
int coverageMask = 1;
|
121
|
+
SequencesReader *seqReadInfo = NULL;
|
122
|
+
|
123
|
+
setProgramName("velvetg");
|
124
|
+
|
125
|
+
for (cat = 0; cat < CATEGORIES; cat++) {
|
126
|
+
insertLength[cat] = -1;
|
127
|
+
std_dev[cat] = -1;
|
128
|
+
shadows[cat] = false;
|
129
|
+
}
|
130
|
+
|
131
|
+
// Error message
|
132
|
+
if (argc == 1) {
|
133
|
+
puts("velvetg - de Bruijn graph construction, error removal and repeat resolution");
|
134
|
+
printf("Version %i.%i.%2.2i\n", VERSION_NUMBER,
|
135
|
+
RELEASE_NUMBER, UPDATE_NUMBER);
|
136
|
+
puts("Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)");
|
137
|
+
puts("This is free software; see the source for copying conditions. There is NO");
|
138
|
+
puts("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.");
|
139
|
+
puts("Compilation settings:");
|
140
|
+
printf("CATEGORIES = %i\n", CATEGORIES);
|
141
|
+
printf("MAXKMERLENGTH = %i\n", MAXKMERLENGTH);
|
142
|
+
#ifdef _OPENMP
|
143
|
+
puts("OPENMP");
|
144
|
+
#endif
|
145
|
+
#ifdef LONGSEQUENCES
|
146
|
+
puts("LONGSEQUENCES");
|
147
|
+
#endif
|
148
|
+
#ifdef BIGASSEMBLY
|
149
|
+
puts("BIGASSEMBLY");
|
150
|
+
#endif
|
151
|
+
#ifdef COLOR
|
152
|
+
puts("COLOR");
|
153
|
+
#endif
|
154
|
+
#ifdef DEBUG
|
155
|
+
puts("DEBUG");
|
156
|
+
#endif
|
157
|
+
puts("");
|
158
|
+
printUsage();
|
159
|
+
return 1;
|
160
|
+
}
|
161
|
+
|
162
|
+
if (strcmp(argv[1], "--help") == 0) {
|
163
|
+
printUsage();
|
164
|
+
return 0;
|
165
|
+
}
|
166
|
+
|
167
|
+
// Memory allocation
|
168
|
+
directory = argv[1];
|
169
|
+
graphFilename = mallocOrExit(strlen(directory) + 100, char);
|
170
|
+
connectedGraphFilename = mallocOrExit(strlen(directory) + 100, char);
|
171
|
+
preGraphFilename =
|
172
|
+
mallocOrExit(strlen(directory) + 100, char);
|
173
|
+
roadmapFilename = mallocOrExit(strlen(directory) + 100, char);
|
174
|
+
seqFilename = mallocOrExit(strlen(directory) + 100, char);
|
175
|
+
lowCovContigsFilename = mallocOrExit(strlen(directory) + 100, char);
|
176
|
+
highCovContigsFilename = mallocOrExit(strlen(directory) + 100, char);
|
177
|
+
|
178
|
+
// Argument parsing
|
179
|
+
for (arg_index = 2; arg_index < argc; arg_index++) {
|
180
|
+
arg = argv[arg_index++];
|
181
|
+
if (arg_index >= argc) {
|
182
|
+
velvetLog("Unusual number of arguments!\n");
|
183
|
+
printUsage();
|
184
|
+
#ifdef DEBUG
|
185
|
+
abort();
|
186
|
+
#endif
|
187
|
+
exit(1);
|
188
|
+
}
|
189
|
+
|
190
|
+
if (strcmp(arg, "-cov_cutoff") == 0) {
|
191
|
+
if (strcmp(argv[arg_index], "auto") == 0) {
|
192
|
+
estimateCutoff = true;
|
193
|
+
} else {
|
194
|
+
sscanf(argv[arg_index], "%lf", &coverageCutoff);
|
195
|
+
}
|
196
|
+
} else if (strcmp(arg, "-long_cov_cutoff") == 0) {
|
197
|
+
sscanf(argv[arg_index], "%lf", &longCoverageCutoff);
|
198
|
+
} else if (strcmp(arg, "-exp_cov") == 0) {
|
199
|
+
if (strcmp(argv[arg_index], "auto") == 0) {
|
200
|
+
estimateCoverage = true;
|
201
|
+
readTracking = true;
|
202
|
+
} else {
|
203
|
+
sscanf(argv[arg_index], "%lf", &expectedCoverage);
|
204
|
+
if (expectedCoverage > 0)
|
205
|
+
readTracking = true;
|
206
|
+
}
|
207
|
+
} else if (strcmp(arg, "-ins_length") == 0) {
|
208
|
+
sscanf(argv[arg_index], "%lli", &longlong_var);
|
209
|
+
insertLength[0] = (Coordinate) longlong_var;
|
210
|
+
if (insertLength[0] < 0) {
|
211
|
+
velvetLog("Invalid insert length: %lli\n",
|
212
|
+
(long long) insertLength[0]);
|
213
|
+
#ifdef DEBUG
|
214
|
+
abort();
|
215
|
+
#endif
|
216
|
+
exit(1);
|
217
|
+
}
|
218
|
+
} else if (strcmp(arg, "-ins_length_sd") == 0) {
|
219
|
+
sscanf(argv[arg_index], "%lli", &longlong_var);
|
220
|
+
std_dev[0] = (Coordinate) longlong_var;
|
221
|
+
if (std_dev[0] < 0) {
|
222
|
+
velvetLog("Invalid std deviation: %lli\n",
|
223
|
+
(long long) std_dev[0]);
|
224
|
+
#ifdef DEBUG
|
225
|
+
abort();
|
226
|
+
#endif
|
227
|
+
exit(1);
|
228
|
+
}
|
229
|
+
} else if (strcmp(arg, "-ins_length_long") == 0) {
|
230
|
+
sscanf(argv[arg_index], "%lli", &longlong_var);
|
231
|
+
insertLengthLong = (Coordinate) longlong_var;
|
232
|
+
} else if (strcmp(arg, "-ins_length_long_sd") == 0) {
|
233
|
+
sscanf(argv[arg_index], "%lli", &longlong_var);
|
234
|
+
std_dev_long = (Coordinate) longlong_var;
|
235
|
+
} else if (strncmp(arg, "-ins_length", 11) == 0
|
236
|
+
&& strchr(arg, 'd') == NULL) {
|
237
|
+
sscanf(arg, "-ins_length%hi", &short_var);
|
238
|
+
cat = (Category) short_var;
|
239
|
+
if (cat < 1 || cat > CATEGORIES) {
|
240
|
+
velvetLog("Unknown option: %s\n", arg);
|
241
|
+
#ifdef DEBUG
|
242
|
+
abort();
|
243
|
+
#endif
|
244
|
+
exit(1);
|
245
|
+
}
|
246
|
+
sscanf(argv[arg_index], "%lli", &longlong_var);
|
247
|
+
insertLength[cat - 1] = (Coordinate) longlong_var;
|
248
|
+
if (insertLength[cat - 1] < 0) {
|
249
|
+
velvetLog("Invalid insert length: %lli\n",
|
250
|
+
(long long) insertLength[cat - 1]);
|
251
|
+
#ifdef DEBUG
|
252
|
+
abort();
|
253
|
+
#endif
|
254
|
+
exit(1);
|
255
|
+
}
|
256
|
+
} else if (strncmp(arg, "-ins_length", 11) == 0) {
|
257
|
+
sscanf(arg, "-ins_length%hi_sd", &short_var);
|
258
|
+
cat = (Category) short_var;
|
259
|
+
if (cat < 1 || cat > CATEGORIES) {
|
260
|
+
velvetLog("Unknown option: %s\n", arg);
|
261
|
+
#ifdef DEBUG
|
262
|
+
abort();
|
263
|
+
#endif
|
264
|
+
exit(1);
|
265
|
+
}
|
266
|
+
sscanf(argv[arg_index], "%lli", &longlong_var);
|
267
|
+
std_dev[cat - 1] = (Coordinate) longlong_var;
|
268
|
+
if (std_dev[cat - 1] < 0) {
|
269
|
+
velvetLog("Invalid std deviation: %lli\n",
|
270
|
+
(long long) std_dev[cat - 1]);
|
271
|
+
#ifdef DEBUG
|
272
|
+
abort();
|
273
|
+
#endif
|
274
|
+
exit(1);
|
275
|
+
}
|
276
|
+
} else if (strcmp(arg, "-read_trkg") == 0) {
|
277
|
+
readTracking =
|
278
|
+
(strcmp(argv[arg_index], "yes") == 0);
|
279
|
+
} else if (strcmp(arg, "-scaffolding") == 0) {
|
280
|
+
scaffolding =
|
281
|
+
(strcmp(argv[arg_index], "yes") == 0);
|
282
|
+
} else if (strcmp(arg, "-exportFiltered") == 0) {
|
283
|
+
exportFilteredNodes =
|
284
|
+
(strcmp(argv[arg_index], "yes") == 0);
|
285
|
+
} else if (strcmp(arg, "-amos_file") == 0) {
|
286
|
+
exportAssembly =
|
287
|
+
(strcmp(argv[arg_index], "yes") == 0);
|
288
|
+
} else if (strcmp(arg, "-alignments") == 0) {
|
289
|
+
exportAlignments =
|
290
|
+
(strcmp(argv[arg_index], "yes") == 0);
|
291
|
+
} else if (strcmp(arg, "-min_contig_lgth") == 0) {
|
292
|
+
sscanf(argv[arg_index], "%lli", &longlong_var);
|
293
|
+
minContigLength = (Coordinate) longlong_var;
|
294
|
+
} else if (strcmp(arg, "-coverage_mask") == 0) {
|
295
|
+
sscanf(argv[arg_index], "%lli", &longlong_var);
|
296
|
+
coverageMask = (IDnum) longlong_var;
|
297
|
+
} else if (strcmp(arg, "-accel_bits") == 0) {
|
298
|
+
sscanf(argv[arg_index], "%hi", &accelerationBits);
|
299
|
+
if (accelerationBits < 0) {
|
300
|
+
velvetLog
|
301
|
+
("Illegal acceleration parameter: %s\n",
|
302
|
+
argv[arg_index]);
|
303
|
+
printUsage();
|
304
|
+
return -1;
|
305
|
+
}
|
306
|
+
} else if (strcmp(arg, "-max_branch_length") == 0) {
|
307
|
+
sscanf(argv[arg_index], "%i", &arg_int);
|
308
|
+
setMaxReadLength(arg_int);
|
309
|
+
setLocalMaxReadLength(arg_int);
|
310
|
+
} else if (strcmp(arg, "-max_divergence") == 0) {
|
311
|
+
sscanf(argv[arg_index], "%lf", &arg_double);
|
312
|
+
setMaxDivergence(arg_double);
|
313
|
+
setLocalMaxDivergence(arg_double);
|
314
|
+
} else if (strcmp(arg, "-max_gap_count") == 0) {
|
315
|
+
sscanf(argv[arg_index], "%i", &arg_int);
|
316
|
+
setMaxGaps(arg_int);
|
317
|
+
setLocalMaxGaps(arg_int);
|
318
|
+
} else if (strcmp(arg, "-min_pair_count") == 0) {
|
319
|
+
sscanf(argv[arg_index], "%i", &arg_int);
|
320
|
+
setUnreliableConnectionCutoff(arg_int);
|
321
|
+
} else if (strcmp(arg, "-max_coverage") == 0) {
|
322
|
+
sscanf(argv[arg_index], "%lf", &maxCoverageCutoff);
|
323
|
+
} else if (strcmp(arg, "-long_mult_cutoff") == 0) {
|
324
|
+
sscanf(argv[arg_index], "%i", &arg_int);
|
325
|
+
setMultiplicityCutoff(arg_int);
|
326
|
+
} else if (strcmp(arg, "-paired_exp_fraction") == 0) {
|
327
|
+
sscanf(argv[arg_index], "%lf", &arg_double);
|
328
|
+
setPairedExpFraction(arg_double);
|
329
|
+
} else if (strcmp(arg, "-clean") == 0) {
|
330
|
+
if (strcmp(argv[arg_index], "yes") == 0)
|
331
|
+
clean = 1;
|
332
|
+
} else if (strcmp(arg, "-very_clean") == 0) {
|
333
|
+
if (strcmp(argv[arg_index], "yes") == 0)
|
334
|
+
clean = 2;
|
335
|
+
} else if (strcmp(arg, "-conserveLong") == 0) {
|
336
|
+
if (strcmp(argv[arg_index], "yes") == 0)
|
337
|
+
conserveLong = 2;
|
338
|
+
} else if (strcmp(arg, "-unused_reads") == 0) {
|
339
|
+
unusedReads =
|
340
|
+
(strcmp(argv[arg_index], "yes") == 0);
|
341
|
+
if (unusedReads)
|
342
|
+
readTracking = true;
|
343
|
+
} else if (strcmp(arg, "-shortMatePaired") == 0) {
|
344
|
+
shadows[0] = (strcmp(argv[arg_index], "yes") == 0);
|
345
|
+
} else if (strncmp(arg, "-shortMatePaired", 16) == 0) {
|
346
|
+
sscanf(arg, "-shortMatePaired%hi", &short_var);
|
347
|
+
cat = (Category) short_var;
|
348
|
+
if (cat < 1 || cat > CATEGORIES) {
|
349
|
+
velvetLog("Unknown option: %s\n", arg);
|
350
|
+
#ifdef DEBUG
|
351
|
+
abort();
|
352
|
+
#endif
|
353
|
+
exit(1);
|
354
|
+
}
|
355
|
+
shadows[cat - 1] = (strcmp(argv[arg_index], "yes") == 0);
|
356
|
+
} else if (strcmp(arg, "--help") == 0) {
|
357
|
+
printUsage();
|
358
|
+
return 0;
|
359
|
+
} else {
|
360
|
+
velvetLog("Unknown option: %s;\n", arg);
|
361
|
+
printUsage();
|
362
|
+
return 1;
|
363
|
+
}
|
364
|
+
}
|
365
|
+
|
366
|
+
// Bookkeeping
|
367
|
+
logInstructions(argc, argv, directory);
|
368
|
+
|
369
|
+
seqReadInfo = callocOrExit(1, SequencesReader);
|
370
|
+
strcpy(seqFilename, directory);
|
371
|
+
// if binary CnyUnifiedSeq exists, use it. Otherwise try Sequences
|
372
|
+
strcat(seqFilename, "/CnyUnifiedSeq");
|
373
|
+
if (access(seqFilename, R_OK) == 0) {
|
374
|
+
seqReadInfo->m_bIsBinary = true;
|
375
|
+
} else {
|
376
|
+
seqReadInfo->m_bIsBinary = false;
|
377
|
+
strcpy(seqFilename, directory);
|
378
|
+
strcat(seqFilename, "/Sequences");
|
379
|
+
}
|
380
|
+
seqReadInfo->m_seqFilename = seqFilename;
|
381
|
+
strcpy(roadmapFilename, directory);
|
382
|
+
strcat(roadmapFilename, "/Roadmaps");
|
383
|
+
|
384
|
+
strcpy(preGraphFilename, directory);
|
385
|
+
strcat(preGraphFilename, "/PreGraph");
|
386
|
+
|
387
|
+
strcpy(connectedGraphFilename, directory);
|
388
|
+
strcat(connectedGraphFilename, "/ConnectedGraph");
|
389
|
+
|
390
|
+
if (!readTracking) {
|
391
|
+
strcpy(graphFilename, directory);
|
392
|
+
strcat(graphFilename, "/Graph");
|
393
|
+
} else {
|
394
|
+
strcpy(graphFilename, directory);
|
395
|
+
strcat(graphFilename, "/Graph2");
|
396
|
+
}
|
397
|
+
|
398
|
+
strcpy(lowCovContigsFilename, directory);
|
399
|
+
strcat(lowCovContigsFilename, "/lowCoverageContigs.fa");
|
400
|
+
|
401
|
+
strcpy(highCovContigsFilename, directory);
|
402
|
+
strcat(highCovContigsFilename, "/highCoverageContigs.fa");
|
403
|
+
|
404
|
+
// Graph uploading or creation
|
405
|
+
if ((file = fopen(graphFilename, "r")) != NULL) {
|
406
|
+
fclose(file);
|
407
|
+
|
408
|
+
graph = importGraph(graphFilename);
|
409
|
+
|
410
|
+
} else if ((file = fopen(connectedGraphFilename, "r")) != NULL) {
|
411
|
+
fclose(file);
|
412
|
+
if (seqReadInfo->m_bIsBinary) {
|
413
|
+
|
414
|
+
sequences = importCnyReadSet(seqFilename);
|
415
|
+
|
416
|
+
#if 0
|
417
|
+
// compare to velvet's version of a seq
|
418
|
+
ReadSet *compareSequences = NULL;
|
419
|
+
compareSeqFilename = mallocOrExit(strlen(directory) + 100, char);
|
420
|
+
strcpy(compareSeqFilename, directory);
|
421
|
+
strcat(compareSeqFilename, "/Sequences");
|
422
|
+
compareSequences = importReadSet(compareSeqFilename);
|
423
|
+
convertSequences(compareSequences);
|
424
|
+
if (sequences->readCount != compareSequences->readCount) {
|
425
|
+
printf("read count mismatch\n");
|
426
|
+
exit(1);
|
427
|
+
}
|
428
|
+
int i;
|
429
|
+
for (i = 0; i < sequences->readCount; i++) {
|
430
|
+
TightString *tString = getTightStringInArray(sequences->tSequences, i);
|
431
|
+
TightString *tStringCmp = getTightStringInArray(compareSequences->tSequences, i);
|
432
|
+
if (getLength(tString) != getLength(tStringCmp)) {
|
433
|
+
printf("sequence %d len mismatch\n", i);
|
434
|
+
exit(1);
|
435
|
+
}
|
436
|
+
if (strcmp(readTightString(tString), readTightString(tStringCmp)) != 0) {
|
437
|
+
printf("sequence %d cmp mismatch\n", i);
|
438
|
+
printf("seq %s != cmp %s\n", readTightString(tString), readTightString(tStringCmp));
|
439
|
+
exit(1);
|
440
|
+
}
|
441
|
+
}
|
442
|
+
#endif
|
443
|
+
} else {
|
444
|
+
sequences = importReadSet(seqFilename);
|
445
|
+
convertSequences(sequences);
|
446
|
+
}
|
447
|
+
seqReadInfo->m_sequences = sequences;
|
448
|
+
|
449
|
+
graph =
|
450
|
+
importConnectedGraph(connectedGraphFilename, sequences,
|
451
|
+
roadmapFilename, readTracking, accelerationBits);
|
452
|
+
|
453
|
+
sequenceLengths =
|
454
|
+
getSequenceLengths(sequences, getWordLength(graph));
|
455
|
+
correctGraph(graph, sequenceLengths, sequences->categories, conserveLong);
|
456
|
+
exportGraph(graphFilename, graph, sequences->tSequences);
|
457
|
+
} else if ((file = fopen(preGraphFilename, "r")) != NULL) {
|
458
|
+
fclose(file);
|
459
|
+
if (seqReadInfo->m_bIsBinary) {
|
460
|
+
sequences = importCnyReadSet(seqFilename);
|
461
|
+
} else {
|
462
|
+
sequences = importReadSet(seqFilename);
|
463
|
+
convertSequences(sequences);
|
464
|
+
}
|
465
|
+
seqReadInfo->m_sequences = sequences;
|
466
|
+
graph =
|
467
|
+
importPreGraph(preGraphFilename, sequences,
|
468
|
+
roadmapFilename, readTracking, accelerationBits);
|
469
|
+
sequenceLengths =
|
470
|
+
getSequenceLengths(sequences, getWordLength(graph));
|
471
|
+
correctGraph(graph, sequenceLengths, sequences->categories, conserveLong);
|
472
|
+
exportGraph(graphFilename, graph, sequences->tSequences);
|
473
|
+
} else if ((file = fopen(roadmapFilename, "r")) != NULL) {
|
474
|
+
fclose(file);
|
475
|
+
|
476
|
+
rdmaps = importRoadMapArray(roadmapFilename);
|
477
|
+
if (seqReadInfo->m_bIsBinary) {
|
478
|
+
// pull in sequences first and use in preGraph
|
479
|
+
sequences = importCnyReadSet(seqFilename);
|
480
|
+
seqReadInfo->m_sequences = sequences;
|
481
|
+
#if 0
|
482
|
+
// compare to velvet's version of a seq
|
483
|
+
ReadSet *compareSequences = NULL;
|
484
|
+
char *compareSeqFilename = mallocOrExit(strlen(directory) + 100, char);
|
485
|
+
strcpy(compareSeqFilename, directory);
|
486
|
+
strcat(compareSeqFilename, "/Sequences");
|
487
|
+
compareSequences = importReadSet(compareSeqFilename);
|
488
|
+
convertSequences(compareSequences);
|
489
|
+
if (sequences->readCount != compareSequences->readCount) {
|
490
|
+
printf("read count mismatch\n");
|
491
|
+
exit(1);
|
492
|
+
}
|
493
|
+
int i;
|
494
|
+
for (i = 0; i < sequences->readCount; i++) {
|
495
|
+
TightString *tString = getTightStringInArray(sequences->tSequences, i);
|
496
|
+
TightString *tStringCmp = getTightStringInArray(compareSequences->tSequences, i);
|
497
|
+
if (getLength(tString) != getLength(tStringCmp)) {
|
498
|
+
printf("sequence %d len mismatch\n", i);
|
499
|
+
exit(1);
|
500
|
+
}
|
501
|
+
if (strcmp(readTightString(tString), readTightString(tStringCmp)) != 0) {
|
502
|
+
printf("sequence %d cmp mismatch\n", i);
|
503
|
+
printf("seq %s != cmp %s\n", readTightString(tString), readTightString(tStringCmp));
|
504
|
+
exit(1);
|
505
|
+
}
|
506
|
+
}
|
507
|
+
printf("sequence files match!\n");
|
508
|
+
#endif
|
509
|
+
}
|
510
|
+
preGraph = newPreGraph_pg(rdmaps, seqReadInfo);
|
511
|
+
concatenatePreGraph_pg(preGraph);
|
512
|
+
if (!conserveLong)
|
513
|
+
clipTips_pg(preGraph);
|
514
|
+
exportPreGraph_pg(preGraphFilename, preGraph);
|
515
|
+
destroyPreGraph_pg(preGraph);
|
516
|
+
if (!seqReadInfo->m_bIsBinary) {
|
517
|
+
sequences = importReadSet(seqFilename);
|
518
|
+
convertSequences(sequences);
|
519
|
+
seqReadInfo->m_sequences = sequences;
|
520
|
+
}
|
521
|
+
graph =
|
522
|
+
importPreGraph(preGraphFilename, sequences,
|
523
|
+
roadmapFilename, readTracking, accelerationBits);
|
524
|
+
sequenceLengths =
|
525
|
+
getSequenceLengths(sequences, getWordLength(graph));
|
526
|
+
correctGraph(graph, sequenceLengths, sequences->categories, conserveLong);
|
527
|
+
exportGraph(graphFilename, graph, sequences->tSequences);
|
528
|
+
} else {
|
529
|
+
velvetLog("No Roadmap file to build upon! Please run velveth (see manual)\n");
|
530
|
+
#ifdef DEBUG
|
531
|
+
abort();
|
532
|
+
#endif
|
533
|
+
exit(1);
|
534
|
+
}
|
535
|
+
|
536
|
+
// Set insert lengths and their standard deviations
|
537
|
+
for (cat = 0; cat < CATEGORIES; cat++) {
|
538
|
+
if (insertLength[cat] > -1 && std_dev[cat] < 0)
|
539
|
+
std_dev[cat] = insertLength[cat] / 10;
|
540
|
+
setInsertLengths(graph, cat,
|
541
|
+
insertLength[cat], std_dev[cat]);
|
542
|
+
}
|
543
|
+
|
544
|
+
if (insertLengthLong > -1 && std_dev_long < 0)
|
545
|
+
std_dev_long = insertLengthLong / 10;
|
546
|
+
setInsertLengths(graph, CATEGORIES,
|
547
|
+
insertLengthLong, std_dev_long);
|
548
|
+
|
549
|
+
// Coverage cutoff
|
550
|
+
if (expectedCoverage < 0 && estimateCoverage == true) {
|
551
|
+
expectedCoverage = estimated_cov(graph, directory);
|
552
|
+
if (coverageCutoff < 0) {
|
553
|
+
coverageCutoff = expectedCoverage / 2;
|
554
|
+
estimateCutoff = true;
|
555
|
+
}
|
556
|
+
} else {
|
557
|
+
estimateCoverage = false;
|
558
|
+
if (coverageCutoff < 0 && estimateCutoff)
|
559
|
+
coverageCutoff = estimated_cov(graph, directory) / 2;
|
560
|
+
else
|
561
|
+
estimateCutoff = false;
|
562
|
+
}
|
563
|
+
|
564
|
+
if (coverageCutoff < 0) {
|
565
|
+
velvetLog("WARNING: NO COVERAGE CUTOFF PROVIDED\n");
|
566
|
+
velvetLog("Velvet will probably leave behind many detectable errors\n");
|
567
|
+
velvetLog("See manual for instructions on how to set the coverage cutoff parameter\n");
|
568
|
+
}
|
569
|
+
|
570
|
+
if (sequences == NULL) {
|
571
|
+
if (seqReadInfo->m_bIsBinary) {
|
572
|
+
sequences = importCnyReadSet(seqFilename);
|
573
|
+
} else {
|
574
|
+
sequences = importReadSet(seqFilename);
|
575
|
+
convertSequences(sequences);
|
576
|
+
}
|
577
|
+
seqReadInfo->m_sequences = sequences;
|
578
|
+
}
|
579
|
+
|
580
|
+
if (minContigLength < 2 * getWordLength(graph))
|
581
|
+
minContigKmerLength = getWordLength(graph);
|
582
|
+
else
|
583
|
+
minContigKmerLength = minContigLength - getWordLength(graph) + 1;
|
584
|
+
|
585
|
+
dubious =
|
586
|
+
removeLowCoverageNodesAndDenounceDubiousReads(graph,
|
587
|
+
coverageCutoff,
|
588
|
+
sequences,
|
589
|
+
exportFilteredNodes,
|
590
|
+
minContigKmerLength,
|
591
|
+
lowCovContigsFilename);
|
592
|
+
|
593
|
+
removeLowLongCoverageNodesAndDenounceDubiousReads(graph,
|
594
|
+
longCoverageCutoff,
|
595
|
+
sequences,
|
596
|
+
dubious,
|
597
|
+
exportFilteredNodes,
|
598
|
+
minContigKmerLength,
|
599
|
+
lowCovContigsFilename);
|
600
|
+
|
601
|
+
removeHighCoverageNodes(graph, maxCoverageCutoff, exportFilteredNodes, minContigKmerLength, highCovContigsFilename);
|
602
|
+
clipTipsHard(graph, conserveLong);
|
603
|
+
|
604
|
+
if (sequences->readCount > 0 && sequences->categories[0] == REFERENCE)
|
605
|
+
removeLowArcs(graph, coverageCutoff);
|
606
|
+
|
607
|
+
if (expectedCoverage > 0) {
|
608
|
+
|
609
|
+
// Mixed length sequencing
|
610
|
+
readCoherentGraph(graph, isUniqueSolexa, expectedCoverage,
|
611
|
+
sequences);
|
612
|
+
|
613
|
+
// Paired end resolution
|
614
|
+
createReadPairingArray(sequences);
|
615
|
+
pebbleRounds += pairedCategories(sequences);
|
616
|
+
detachDubiousReads(sequences, dubious);
|
617
|
+
activateGapMarkers(graph);
|
618
|
+
|
619
|
+
for ( ;pebbleRounds > 0; pebbleRounds--)
|
620
|
+
exploitShortReadPairs(graph, sequences, dubious, shadows, scaffolding);
|
621
|
+
|
622
|
+
} else {
|
623
|
+
velvetLog("WARNING: NO EXPECTED COVERAGE PROVIDED\n");
|
624
|
+
velvetLog("Velvet will be unable to resolve any repeats\n");
|
625
|
+
velvetLog("See manual for instructions on how to set the expected coverage parameter\n");
|
626
|
+
}
|
627
|
+
|
628
|
+
if (dubious)
|
629
|
+
free(dubious);
|
630
|
+
|
631
|
+
concatenateGraph(graph);
|
632
|
+
|
633
|
+
removeLowCoverageReferenceNodes(graph, coverageCutoff, longCoverageCutoff, sequences);
|
634
|
+
|
635
|
+
strcpy(graphFilename, directory);
|
636
|
+
strcat(graphFilename, "/contigs.fa");
|
637
|
+
sequenceLengths = getSequenceLengths(sequences, getWordLength(graph));
|
638
|
+
exportLongNodeSequences(graphFilename, graph, minContigKmerLength, sequences, sequenceLengths, coverageMask);
|
639
|
+
|
640
|
+
if (exportAlignments) {
|
641
|
+
strcpy(graphFilename, directory);
|
642
|
+
strcat(graphFilename, "/contig-alignments.psa");
|
643
|
+
exportLongNodeMappings(graphFilename, graph, sequences,
|
644
|
+
minContigKmerLength, seqReadInfo);
|
645
|
+
}
|
646
|
+
|
647
|
+
strcpy(graphFilename, directory);
|
648
|
+
strcat(graphFilename, "/stats.txt");
|
649
|
+
displayGeneralStatistics(graph, graphFilename, sequences);
|
650
|
+
|
651
|
+
if (clean == 0) {
|
652
|
+
strcpy(graphFilename, directory);
|
653
|
+
strcat(graphFilename, "/LastGraph");
|
654
|
+
exportGraph(graphFilename, graph, sequences->tSequences);
|
655
|
+
}
|
656
|
+
|
657
|
+
if (exportAssembly) {
|
658
|
+
strcpy(graphFilename, directory);
|
659
|
+
strcat(graphFilename, "/velvet_asm.afg");
|
660
|
+
exportAMOSContigs(graphFilename, graph, minContigKmerLength, sequences);
|
661
|
+
}
|
662
|
+
|
663
|
+
if (unusedReads)
|
664
|
+
exportUnusedReads(graph, sequences, minContigKmerLength, directory);
|
665
|
+
|
666
|
+
if (estimateCoverage)
|
667
|
+
velvetLog("Estimated Coverage = %f\n", expectedCoverage);
|
668
|
+
if (estimateCutoff)
|
669
|
+
velvetLog("Estimated Coverage cutoff = %f\n", coverageCutoff);
|
670
|
+
|
671
|
+
logFinalStats(graph, minContigKmerLength, directory);
|
672
|
+
|
673
|
+
if (clean > 0) {
|
674
|
+
strcpy(graphFilename, directory);
|
675
|
+
strcat(graphFilename, "/Roadmaps");
|
676
|
+
remove(graphFilename);
|
677
|
+
|
678
|
+
strcpy(graphFilename, directory);
|
679
|
+
strcat(graphFilename, "/LastGraph");
|
680
|
+
remove(graphFilename);
|
681
|
+
}
|
682
|
+
|
683
|
+
if (clean > 1) {
|
684
|
+
strcpy(graphFilename, directory);
|
685
|
+
strcat(graphFilename, "/Sequences");
|
686
|
+
remove(graphFilename);
|
687
|
+
|
688
|
+
strcpy(graphFilename, directory);
|
689
|
+
strcat(graphFilename, "/Graph2");
|
690
|
+
remove(graphFilename);
|
691
|
+
|
692
|
+
strcpy(graphFilename, directory);
|
693
|
+
strcat(graphFilename, "/Graph");
|
694
|
+
remove(graphFilename);
|
695
|
+
}
|
696
|
+
|
697
|
+
free(sequenceLengths);
|
698
|
+
destroyGraph(graph);
|
699
|
+
free(graphFilename);
|
700
|
+
free(connectedGraphFilename);
|
701
|
+
free(preGraphFilename);
|
702
|
+
free(seqFilename);
|
703
|
+
free(roadmapFilename);
|
704
|
+
free(lowCovContigsFilename);
|
705
|
+
free(highCovContigsFilename);
|
706
|
+
destroyReadSet(sequences);
|
707
|
+
if (seqReadInfo) {
|
708
|
+
free(seqReadInfo);
|
709
|
+
}
|
710
|
+
|
711
|
+
return 0;
|
712
|
+
}
|