coralsnake 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coralsnake-0.0.1/PKG-INFO +19 -0
- coralsnake-0.0.1/README.md +2 -0
- coralsnake-0.0.1/build.py +108 -0
- coralsnake-0.0.1/coralsnake/.gitignore +1 -0
- coralsnake-0.0.1/coralsnake/__init__.py +0 -0
- coralsnake-0.0.1/coralsnake/cli.py +148 -0
- coralsnake-0.0.1/coralsnake/conversion.py +47 -0
- coralsnake-0.0.1/coralsnake/debug.py +39 -0
- coralsnake-0.0.1/coralsnake/gtf2tx.py +189 -0
- coralsnake-0.0.1/coralsnake/mapping.py +316 -0
- coralsnake-0.0.1/coralsnake/minimap2/.git +1 -0
- coralsnake-0.0.1/coralsnake/minimap2/.github/workflows/ci.yaml +21 -0
- coralsnake-0.0.1/coralsnake/minimap2/.gitignore +8 -0
- coralsnake-0.0.1/coralsnake/minimap2/.gitmodules +3 -0
- coralsnake-0.0.1/coralsnake/minimap2/FAQ.md +46 -0
- coralsnake-0.0.1/coralsnake/minimap2/LICENSE.txt +24 -0
- coralsnake-0.0.1/coralsnake/minimap2/MANIFEST.in +10 -0
- coralsnake-0.0.1/coralsnake/minimap2/Makefile +136 -0
- coralsnake-0.0.1/coralsnake/minimap2/Makefile.simde +97 -0
- coralsnake-0.0.1/coralsnake/minimap2/NEWS.md +935 -0
- coralsnake-0.0.1/coralsnake/minimap2/README.md +412 -0
- coralsnake-0.0.1/coralsnake/minimap2/align.c +1036 -0
- coralsnake-0.0.1/coralsnake/minimap2/bseq.c +169 -0
- coralsnake-0.0.1/coralsnake/minimap2/bseq.h +64 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/__init__.py +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/__pycache__/__init__.cpython-310.pyc +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/lib.macosx-14-x86_64-cpython-310/mappy.cpython-310-darwin.so +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/align.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/bseq.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/esterr.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/format.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/hit.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/index.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/kalloc.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/ksw2_extd2_sse.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/ksw2_exts2_sse.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/ksw2_extz2_sse.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/ksw2_ll_sse.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/kthread.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/lchain.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/map.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/misc.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/options.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/pe.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/python/mappy.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/sdust.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/seed.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/sketch.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/build/temp.macosx-14-x86_64-cpython-310/splitidx.o +0 -0
- coralsnake-0.0.1/coralsnake/minimap2/code_of_conduct.md +30 -0
- coralsnake-0.0.1/coralsnake/minimap2/cookbook.md +243 -0
- coralsnake-0.0.1/coralsnake/minimap2/esterr.c +64 -0
- coralsnake-0.0.1/coralsnake/minimap2/example.c +63 -0
- coralsnake-0.0.1/coralsnake/minimap2/format.c +628 -0
- coralsnake-0.0.1/coralsnake/minimap2/hit.c +466 -0
- coralsnake-0.0.1/coralsnake/minimap2/index.c +776 -0
- coralsnake-0.0.1/coralsnake/minimap2/kalloc.c +224 -0
- coralsnake-0.0.1/coralsnake/minimap2/kalloc.h +87 -0
- coralsnake-0.0.1/coralsnake/minimap2/kdq.h +132 -0
- coralsnake-0.0.1/coralsnake/minimap2/ketopt.h +120 -0
- coralsnake-0.0.1/coralsnake/minimap2/khash.h +615 -0
- coralsnake-0.0.1/coralsnake/minimap2/krmq.h +474 -0
- coralsnake-0.0.1/coralsnake/minimap2/kseq.h +256 -0
- coralsnake-0.0.1/coralsnake/minimap2/ksort.h +153 -0
- coralsnake-0.0.1/coralsnake/minimap2/ksw2.h +185 -0
- coralsnake-0.0.1/coralsnake/minimap2/ksw2_dispatch.c +96 -0
- coralsnake-0.0.1/coralsnake/minimap2/ksw2_extd2_sse.c +402 -0
- coralsnake-0.0.1/coralsnake/minimap2/ksw2_exts2_sse.c +455 -0
- coralsnake-0.0.1/coralsnake/minimap2/ksw2_extz2_sse.c +313 -0
- coralsnake-0.0.1/coralsnake/minimap2/ksw2_ll_sse.c +152 -0
- coralsnake-0.0.1/coralsnake/minimap2/kthread.c +159 -0
- coralsnake-0.0.1/coralsnake/minimap2/kthread.h +15 -0
- coralsnake-0.0.1/coralsnake/minimap2/kvec.h +105 -0
- coralsnake-0.0.1/coralsnake/minimap2/lchain.c +368 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/.appveyor.yml +29 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/.azure-pipelines.yml +42 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/.drone.star +290 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/.editorconfig +18 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/.git +1 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/.github/workflows/ci.yml +38 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/.gitignore +1 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/.gitmodules +3 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/.travis.yml +296 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/CONTRIBUTING.md +114 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/COPYING +20 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/README.md +333 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/amalgamate.py +58 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/meson.build +33 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/netlify.toml +20 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/arm/neon.h +97 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/check.h +267 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/debug-trap.h +83 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/hedley.h +1899 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/simde-arch.h +445 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/simde-common.h +697 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/avx.h +5385 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/fma.h +659 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/sse.h +3696 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/sse3.h +343 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/simde/x86/svml.h +543 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/.gitignore +4 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/CMakeLists.txt +166 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/meson.build +4 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/test-arm.c +20 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/arm/test-arm.h +8 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/meson.build +64 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/.appveyor.yml +34 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/.dir-locals.el +2 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/.git +1 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/.gitignore +6 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/.travis.yml +149 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/COPYING +21 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/Makefile +55 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/README.md +54 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/example.c +351 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/meson.build +37 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/munit.c +2055 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/munit/munit.h +535 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/run-tests.c +20 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/run-tests.h +260 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/avx.c +13752 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/avx2.c +9977 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/fma.c +2557 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/meson.build +33 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/mmx.c +2878 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/skel.c +2984 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/sse.c +5121 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/sse2.c +9860 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/sse3.c +486 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/svml.c +1545 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/test-avx.h +16 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/test-sse.h +13 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/test-x86.c +48 -0
- coralsnake-0.0.1/coralsnake/minimap2/lib/simde/test/x86/test-x86.h +8 -0
- coralsnake-0.0.1/coralsnake/minimap2/main.c +474 -0
- coralsnake-0.0.1/coralsnake/minimap2/map.c +709 -0
- coralsnake-0.0.1/coralsnake/minimap2/minimap.h +422 -0
- coralsnake-0.0.1/coralsnake/minimap2/minimap2.1 +788 -0
- coralsnake-0.0.1/coralsnake/minimap2/misc/README.md +179 -0
- coralsnake-0.0.1/coralsnake/minimap2/misc/mmphase.js +335 -0
- coralsnake-0.0.1/coralsnake/minimap2/misc/paftools.js +3723 -0
- coralsnake-0.0.1/coralsnake/minimap2/misc.c +162 -0
- coralsnake-0.0.1/coralsnake/minimap2/mmpriv.h +131 -0
- coralsnake-0.0.1/coralsnake/minimap2/options.c +252 -0
- coralsnake-0.0.1/coralsnake/minimap2/pe.c +177 -0
- coralsnake-0.0.1/coralsnake/minimap2/pyproject.toml +2 -0
- coralsnake-0.0.1/coralsnake/minimap2/python/README.rst +198 -0
- coralsnake-0.0.1/coralsnake/minimap2/python/cmappy.h +152 -0
- coralsnake-0.0.1/coralsnake/minimap2/python/cmappy.pxd +154 -0
- coralsnake-0.0.1/coralsnake/minimap2/python/mappy.c +20901 -0
- coralsnake-0.0.1/coralsnake/minimap2/python/mappy.pyx +278 -0
- coralsnake-0.0.1/coralsnake/minimap2/python/minimap2.py +41 -0
- coralsnake-0.0.1/coralsnake/minimap2/sdust.c +213 -0
- coralsnake-0.0.1/coralsnake/minimap2/sdust.h +25 -0
- coralsnake-0.0.1/coralsnake/minimap2/seed.c +132 -0
- coralsnake-0.0.1/coralsnake/minimap2/setup.py +55 -0
- coralsnake-0.0.1/coralsnake/minimap2/sketch.c +143 -0
- coralsnake-0.0.1/coralsnake/minimap2/splitidx.c +84 -0
- coralsnake-0.0.1/coralsnake/minimap2/sse2neon/emmintrin.h +1689 -0
- coralsnake-0.0.1/coralsnake/minimap2/test/MT-human.fa +278 -0
- coralsnake-0.0.1/coralsnake/minimap2/test/MT-orang.fa +276 -0
- coralsnake-0.0.1/coralsnake/minimap2/test/q-inv.fa +4 -0
- coralsnake-0.0.1/coralsnake/minimap2/test/q2.fa +2 -0
- coralsnake-0.0.1/coralsnake/minimap2/test/t-inv.fa +127 -0
- coralsnake-0.0.1/coralsnake/minimap2/test/t2.fa +2 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/Makefile +21 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/bioinfo.cls +930 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/blasr-mc.eval +17 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/bowtie2-s3.sam.eval +28 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/bwa-s3.sam.eval +52 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/bwa.eval +55 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/eval2roc.pl +33 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/graphmap.eval +4 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/hs38-simu.sh +10 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/minialign.eval +49 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/minimap2.bib +460 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/minimap2.tex +724 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/mm2-s3.sam.eval +62 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/mm2-update.tex +240 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/mm2.approx.eval +12 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/mm2.eval +13 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/natbib.bst +1288 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/natbib.sty +803 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/ngmlr.eval +38 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/roc.gp +60 -0
- coralsnake-0.0.1/coralsnake/minimap2/tex/snap-s3.sam.eval +62 -0
- coralsnake-0.0.1/coralsnake/run.py +77 -0
- coralsnake-0.0.1/coralsnake/run.smk +343 -0
- coralsnake-0.0.1/coralsnake/tbam2gbam.py +260 -0
- coralsnake-0.0.1/pyproject.toml +24 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: coralsnake
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary:
|
|
5
|
+
Author: Chang Ye
|
|
6
|
+
Author-email: yech1990@gmail.com
|
|
7
|
+
Requires-Python: >=3.10,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Requires-Dist: dnaio (>=1.2.1,<2.0.0)
|
|
13
|
+
Requires-Dist: pyfaidx (>=0.8.1.1,<0.9.0.0)
|
|
14
|
+
Requires-Dist: rich-click (>=1.8.3,<2.0.0)
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# Coralsnake
|
|
18
|
+
|
|
19
|
+
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import platform
|
|
3
|
+
import shutil
|
|
4
|
+
from distutils.command.build_ext import build_ext
|
|
5
|
+
from distutils.core import Distribution, Extension
|
|
6
|
+
|
|
7
|
+
from Cython.Build import cythonize
|
|
8
|
+
|
|
9
|
+
# Define the base path for minimap2 sources
|
|
10
|
+
minimap2_base = "coralsnake/minimap2"
|
|
11
|
+
|
|
12
|
+
# Define extra compile arguments based on the platform
|
|
13
|
+
extra_compile_args = ["-DHAVE_KALLOC", "-O3"]
|
|
14
|
+
include_dirs = [minimap2_base]
|
|
15
|
+
|
|
16
|
+
# Adjust compile args for ARM or x86 architectures
|
|
17
|
+
if platform.machine() in ["aarch64", "arm64"]:
|
|
18
|
+
include_dirs.append(os.path.join(minimap2_base, "sse2neon"))
|
|
19
|
+
extra_compile_args.extend(["-ftree-vectorize", "-DKSW_SSE2_ONLY", "-D__SSE2__"])
|
|
20
|
+
else:
|
|
21
|
+
extra_compile_args.append("-msse4.1") # Note: ancient x86_64 CPUs don't have SSE4
|
|
22
|
+
|
|
23
|
+
libraries = ["z", "m", "pthread"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def build():
|
|
27
|
+
sources = [
|
|
28
|
+
"python/mappy.pyx",
|
|
29
|
+
"align.c",
|
|
30
|
+
"bseq.c",
|
|
31
|
+
"lchain.c",
|
|
32
|
+
"seed.c",
|
|
33
|
+
"format.c",
|
|
34
|
+
"hit.c",
|
|
35
|
+
"index.c",
|
|
36
|
+
"pe.c",
|
|
37
|
+
"options.c",
|
|
38
|
+
"ksw2_extd2_sse.c",
|
|
39
|
+
"ksw2_exts2_sse.c",
|
|
40
|
+
"ksw2_extz2_sse.c",
|
|
41
|
+
"ksw2_ll_sse.c",
|
|
42
|
+
"kalloc.c",
|
|
43
|
+
"kthread.c",
|
|
44
|
+
"map.c",
|
|
45
|
+
"misc.c",
|
|
46
|
+
"sdust.c",
|
|
47
|
+
"sketch.c",
|
|
48
|
+
"esterr.c",
|
|
49
|
+
"splitidx.c",
|
|
50
|
+
]
|
|
51
|
+
depends = [
|
|
52
|
+
"minimap.h",
|
|
53
|
+
"bseq.h",
|
|
54
|
+
"kalloc.h",
|
|
55
|
+
"kdq.h",
|
|
56
|
+
"khash.h",
|
|
57
|
+
"kseq.h",
|
|
58
|
+
"ksort.h",
|
|
59
|
+
"ksw2.h",
|
|
60
|
+
"kthread.h",
|
|
61
|
+
"kvec.h",
|
|
62
|
+
"mmpriv.h",
|
|
63
|
+
"sdust.h",
|
|
64
|
+
"python/cmappy.h",
|
|
65
|
+
"python/cmappy.pxd",
|
|
66
|
+
]
|
|
67
|
+
sources = [os.path.join(minimap2_base, file) for file in sources]
|
|
68
|
+
depends = [os.path.join(minimap2_base, file) for file in depends]
|
|
69
|
+
|
|
70
|
+
extensions = [
|
|
71
|
+
Extension(
|
|
72
|
+
"mappy",
|
|
73
|
+
sources=sources,
|
|
74
|
+
depends=depends,
|
|
75
|
+
extra_compile_args=extra_compile_args,
|
|
76
|
+
include_dirs=include_dirs,
|
|
77
|
+
libraries=libraries,
|
|
78
|
+
)
|
|
79
|
+
]
|
|
80
|
+
ext_modules = cythonize(
|
|
81
|
+
extensions,
|
|
82
|
+
include_path=include_dirs,
|
|
83
|
+
compiler_directives={"binding": True, "language_level": 3},
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
distribution = Distribution({"name": "coralsnake", "ext_modules": ext_modules})
|
|
87
|
+
distribution.package_dir = "coralsnake"
|
|
88
|
+
|
|
89
|
+
# Define the output directory for the compiled extensions
|
|
90
|
+
output_dir = "coralsnake"
|
|
91
|
+
cmd = build_ext(distribution)
|
|
92
|
+
cmd.build_lib = output_dir # Direct output to coralsnake directory
|
|
93
|
+
cmd.ensure_finalized()
|
|
94
|
+
cmd.run()
|
|
95
|
+
|
|
96
|
+
# Ensure the outputs are set with the correct permissions
|
|
97
|
+
for output in cmd.get_outputs():
|
|
98
|
+
# relative_extension = os.path.relpath(output, cmd.build_lib)
|
|
99
|
+
# shutil.copyfile(output, relative_extension)
|
|
100
|
+
mode = os.stat(output).st_mode
|
|
101
|
+
mode |= (
|
|
102
|
+
mode & 0o444
|
|
103
|
+
) >> 2 # Make read-only by owner readable by group and others
|
|
104
|
+
os.chmod(output, mode)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
if __name__ == "__main__":
|
|
108
|
+
build()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mappy.*.so
|
|
File without changes
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import rich_click as click
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@click.group(
|
|
5
|
+
invoke_without_command=False,
|
|
6
|
+
help="Variant (genomic variant analysis in python)",
|
|
7
|
+
context_settings=dict(help_option_names=["-h", "--help"]),
|
|
8
|
+
)
|
|
9
|
+
@click.version_option(None, "-v", "--version")
|
|
10
|
+
@click.pass_context
|
|
11
|
+
def cli(ctx):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@cli.command(
|
|
16
|
+
help="Extract primary transcript from gtf/gff file.",
|
|
17
|
+
no_args_is_help=True,
|
|
18
|
+
context_settings=dict(help_option_names=["-h", "--help"]),
|
|
19
|
+
)
|
|
20
|
+
@click.option(
|
|
21
|
+
"--gtf-file",
|
|
22
|
+
"-g",
|
|
23
|
+
"gtf_file",
|
|
24
|
+
help="GTF file.",
|
|
25
|
+
required=True,
|
|
26
|
+
)
|
|
27
|
+
@click.option(
|
|
28
|
+
"--fasta-file",
|
|
29
|
+
"-f",
|
|
30
|
+
"fasta_file",
|
|
31
|
+
help="Fasta file.",
|
|
32
|
+
required=True,
|
|
33
|
+
)
|
|
34
|
+
@click.option(
|
|
35
|
+
"--output-file",
|
|
36
|
+
"-o",
|
|
37
|
+
"output_file",
|
|
38
|
+
help="Output file.",
|
|
39
|
+
required=True,
|
|
40
|
+
)
|
|
41
|
+
@click.option(
|
|
42
|
+
"--seq-file",
|
|
43
|
+
"-s",
|
|
44
|
+
"seq_file",
|
|
45
|
+
help="Sequence file.",
|
|
46
|
+
required=True,
|
|
47
|
+
)
|
|
48
|
+
def extract(gtf_file, fasta_file, output_file, seq_file):
|
|
49
|
+
from .gtf2tx import parse_file
|
|
50
|
+
|
|
51
|
+
parse_file(gtf_file, fasta_file, output_file, seq_file)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@cli.command(
|
|
55
|
+
help="Fetch genomic motif.",
|
|
56
|
+
no_args_is_help=True,
|
|
57
|
+
context_settings=dict(help_option_names=["-h", "--help"]),
|
|
58
|
+
)
|
|
59
|
+
@click.option(
|
|
60
|
+
"--input",
|
|
61
|
+
"-i",
|
|
62
|
+
"input",
|
|
63
|
+
default="-",
|
|
64
|
+
help="Input position file.",
|
|
65
|
+
required=False,
|
|
66
|
+
)
|
|
67
|
+
@click.option(
|
|
68
|
+
"--output",
|
|
69
|
+
"-o",
|
|
70
|
+
"output",
|
|
71
|
+
default="-",
|
|
72
|
+
help="Output annotation file.",
|
|
73
|
+
required=False,
|
|
74
|
+
)
|
|
75
|
+
@click.option(
|
|
76
|
+
"--fasta",
|
|
77
|
+
"-f",
|
|
78
|
+
"fasta",
|
|
79
|
+
help="reference fasta file.",
|
|
80
|
+
required=True,
|
|
81
|
+
)
|
|
82
|
+
@click.option(
|
|
83
|
+
"--npad",
|
|
84
|
+
"-n",
|
|
85
|
+
"npad",
|
|
86
|
+
default="10",
|
|
87
|
+
help="Number of padding base to call motif. "
|
|
88
|
+
"If you want to set different left and right pads, "
|
|
89
|
+
"use comma to separate them. (eg. 2,3)",
|
|
90
|
+
)
|
|
91
|
+
@click.option(
|
|
92
|
+
"--with-header", "-H", help="With header line in input file.", is_flag=True
|
|
93
|
+
)
|
|
94
|
+
@click.option(
|
|
95
|
+
"--columns",
|
|
96
|
+
"-c",
|
|
97
|
+
"columns",
|
|
98
|
+
default="1,2,3",
|
|
99
|
+
show_default=True,
|
|
100
|
+
type=str,
|
|
101
|
+
help="Sets columns for site info. (Chrom,Pos,Strand)",
|
|
102
|
+
)
|
|
103
|
+
@click.option("--to-upper", "-u", help="Convert motif to upper case.", is_flag=True)
|
|
104
|
+
@click.option("--wrap-site", "-w", help="Wrap motif site.", is_flag=True)
|
|
105
|
+
def motif(input, output, fasta, npad, with_header, columns, to_upper, wrap_site):
|
|
106
|
+
from .motif import run_motif
|
|
107
|
+
|
|
108
|
+
if "," in npad:
|
|
109
|
+
lpad, rpad = npad.split(",")
|
|
110
|
+
else:
|
|
111
|
+
lpad, rpad = npad, npad
|
|
112
|
+
# check if lpad and rpad are positive int
|
|
113
|
+
# exit with error if not
|
|
114
|
+
if not lpad.isdigit() or not rpad.isdigit():
|
|
115
|
+
click.echo(f"Error: npad should be positive integer, not {npad}", err=True)
|
|
116
|
+
exit(1)
|
|
117
|
+
lpad = int(lpad)
|
|
118
|
+
rpad = int(rpad)
|
|
119
|
+
run_motif(
|
|
120
|
+
input,
|
|
121
|
+
output,
|
|
122
|
+
fasta,
|
|
123
|
+
lpad,
|
|
124
|
+
rpad,
|
|
125
|
+
with_header,
|
|
126
|
+
columns,
|
|
127
|
+
to_upper,
|
|
128
|
+
wrap_site,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@cli.command(
|
|
133
|
+
help="Map reads to reference genome.",
|
|
134
|
+
no_args_is_help=True,
|
|
135
|
+
context_settings=dict(help_option_names=["-h", "--help"]),
|
|
136
|
+
)
|
|
137
|
+
@click.option("-r", "--ref-file", help="reference file", required=True)
|
|
138
|
+
@click.option("-1", "--r1-file", help="r1 file", required=True)
|
|
139
|
+
@click.option("-2", "--r2-file", help="r2 file", required=True)
|
|
140
|
+
@click.option("-f", "--fwd-lib", is_flag=True, help="forward library")
|
|
141
|
+
def map(ref_file, r1_file, r2_file, fwd_lib):
|
|
142
|
+
from .mapping import map_file
|
|
143
|
+
|
|
144
|
+
map_file(ref_file, r1_file, r2_file, fwd_lib)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
if __name__ == "__main__":
|
|
148
|
+
cli()
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
#
|
|
4
|
+
# Copyright © 2023 Ye Chang yech1990@gmail.com
|
|
5
|
+
# Distributed under terms of the GNU license.
|
|
6
|
+
#
|
|
7
|
+
# Created: 2023-01-30 15:55
|
|
8
|
+
|
|
9
|
+
"""convert A->G, C->T in DNA sequence."""
|
|
10
|
+
|
|
11
|
+
import sys
|
|
12
|
+
|
|
13
|
+
import dnaio
|
|
14
|
+
|
|
15
|
+
MK_BASE_MAPPER = str.maketrans("ACGTacgt", "GTGTgtgt")
|
|
16
|
+
KM_BASE_MAPPER = str.maketrans("ACGTacgt", "ACACacac")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def mk_conversion(seq):
|
|
20
|
+
return seq.translate(MK_BASE_MAPPER)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def km_conversion(seq):
|
|
24
|
+
return seq.translate(KM_BASE_MAPPER)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def convert_file(input_file, output_MK_file, output_KM_file):
|
|
28
|
+
with dnaio.open(input_file, mode="r") as fi, dnaio.open(
|
|
29
|
+
output_MK_file, mode="w"
|
|
30
|
+
) as fo_mk, dnaio.open(output_KM_file, mode="w") as fo_km:
|
|
31
|
+
for read in fi:
|
|
32
|
+
n = read.name.split()[0]
|
|
33
|
+
n_mk = f"{n} YS:Z:{read.sequence}\tST:A:+"
|
|
34
|
+
r_mk = dnaio.SequenceRecord(
|
|
35
|
+
name=n_mk,
|
|
36
|
+
sequence=mk_conversion(read.sequence),
|
|
37
|
+
qualities=read.qualities,
|
|
38
|
+
)
|
|
39
|
+
fo_mk.write(r_mk)
|
|
40
|
+
|
|
41
|
+
n_km = f"{n} YS:Z:{read.sequence}\tST:A:-"
|
|
42
|
+
r_km = dnaio.SequenceRecord(
|
|
43
|
+
name=n_km,
|
|
44
|
+
sequence=km_conversion(read.sequence),
|
|
45
|
+
qualities=read.qualities,
|
|
46
|
+
)
|
|
47
|
+
fo_km.write(r_km)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
#
|
|
4
|
+
# Copyright © 2024 Ye Chang yech1990@gmail.com
|
|
5
|
+
# Distributed under terms of the GNU license.
|
|
6
|
+
#
|
|
7
|
+
# Created: 2024-06-08 20:32
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
import mappy as mp
|
|
13
|
+
|
|
14
|
+
# on forward strand example
|
|
15
|
+
seq1 = "TCGGGTTGCTTGGGAATGCAGCCCAAAGCGGGT"
|
|
16
|
+
seq2 = "AGTTTACCACCCGCTTTGGGCTGCATTCCCAAGCA"
|
|
17
|
+
|
|
18
|
+
idx = mp.Aligner(fn_idx_in="./debug.fa", preset="sr")
|
|
19
|
+
|
|
20
|
+
# q_st q_en strand ctg ctg_len r_st r_en mlen blen mapq cg:Z:cigar_str
|
|
21
|
+
|
|
22
|
+
for hit in idx.map(seq1, seq2=seq2, cs=True, MD=True):
|
|
23
|
+
print(str(hit))
|
|
24
|
+
print(".......")
|
|
25
|
+
|
|
26
|
+
# reverse
|
|
27
|
+
for hit in idx.map(seq1, seq2=seq2[::-1], cs=True, MD=True):
|
|
28
|
+
print(str(hit))
|
|
29
|
+
print(".......")
|
|
30
|
+
|
|
31
|
+
# complement
|
|
32
|
+
for hit in idx.map(seq1, seq2=mp.revcomp(seq2)[::-1], cs=True, MD=True):
|
|
33
|
+
print(str(hit))
|
|
34
|
+
print(".......")
|
|
35
|
+
|
|
36
|
+
# rev complement complement
|
|
37
|
+
for hit in idx.map(seq1, seq2=mp.revcomp(seq2), cs=True, MD=True):
|
|
38
|
+
print(str(hit))
|
|
39
|
+
print(".......")
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
#
|
|
4
|
+
# Copyright © 2024 Ye Chang yech1990@gmail.com
|
|
5
|
+
# Distributed under terms of the GNU license.
|
|
6
|
+
#
|
|
7
|
+
# Created: 2024-05-01 18:23
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
|
|
13
|
+
from pyfaidx import Fasta
|
|
14
|
+
from rich.progress import track
|
|
15
|
+
|
|
16
|
+
logging.basicConfig(level=logging.INFO)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TranscriptExon:
|
|
20
|
+
def __init__(self, chrom=None, strand=None, gene_id=None, gene_name=None):
|
|
21
|
+
self.gene_id = gene_id
|
|
22
|
+
self.gene_name = gene_name
|
|
23
|
+
self.chrom = chrom
|
|
24
|
+
self.strand = strand
|
|
25
|
+
self.span = {}
|
|
26
|
+
self.priority = (10, 0)
|
|
27
|
+
|
|
28
|
+
def add_span(self, gene_id, gene_name, chrom, strand, exon_number, start, end):
|
|
29
|
+
self.gene_id = gene_id
|
|
30
|
+
self.gene_name = gene_name
|
|
31
|
+
if self.chrom is not None and self.chrom != chrom:
|
|
32
|
+
raise ValueError("Chrom mismatch")
|
|
33
|
+
self.chrom = chrom
|
|
34
|
+
self.strand = strand
|
|
35
|
+
self.span[exon_number] = (start, end)
|
|
36
|
+
|
|
37
|
+
def calc_len(self):
|
|
38
|
+
return sum([v[1] - v[0] + 1 for _, v in self.span.items()])
|
|
39
|
+
|
|
40
|
+
def get_seq(self, fasta):
|
|
41
|
+
seq = ""
|
|
42
|
+
for _, v in sorted(self.span.items()):
|
|
43
|
+
e = fasta[self.chrom][v[0] - 1 : v[1]]
|
|
44
|
+
if self.strand == "-":
|
|
45
|
+
e = e.reverse.complement
|
|
46
|
+
seq += e.seq
|
|
47
|
+
return seq.upper()
|
|
48
|
+
|
|
49
|
+
def __repr__(self):
|
|
50
|
+
# sort the exon number
|
|
51
|
+
# span_str = ",".join([f"{v[0]}-{v[1]}" for _, v in sorted(self.span.items())])
|
|
52
|
+
# do not sort the exon number
|
|
53
|
+
span_str = ",".join([f"{v[0]}-{v[1]}" for v in self.span.values()])
|
|
54
|
+
gene_name = "." if self.gene_name is None else self.gene_name
|
|
55
|
+
return f"{gene_name}\t{self.chrom}\t{self.strand}\t{span_str}"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def parse_gtf_annot(annot):
|
|
59
|
+
annot = annot.rstrip("\n").rstrip(";").split("; ")
|
|
60
|
+
annot = [x.strip().split(" ", 1) for x in annot if x]
|
|
61
|
+
# if the key is duplicate, join the values
|
|
62
|
+
d = defaultdict(list)
|
|
63
|
+
for k, v in annot:
|
|
64
|
+
d[k].append(v.strip('"'))
|
|
65
|
+
|
|
66
|
+
return {k: "; ".join(v) for k, v in d.items()}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def parse_gff_annot(annot):
|
|
70
|
+
annot = annot.rstrip("\n").rstrip(";").split(";")
|
|
71
|
+
annot = [x.strip().split("=", 1) for x in annot if x]
|
|
72
|
+
# assume gff is unique
|
|
73
|
+
return {k: v for k, v in annot}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def read_gtf(gtf_file, is_gff=False):
|
|
77
|
+
if is_gff:
|
|
78
|
+
parse_annot = parse_gff_annot
|
|
79
|
+
else:
|
|
80
|
+
parse_annot = parse_gtf_annot
|
|
81
|
+
gene_dict = defaultdict(lambda: defaultdict(lambda: TranscriptExon()))
|
|
82
|
+
|
|
83
|
+
# i = 0
|
|
84
|
+
with open(gtf_file, "r") as f:
|
|
85
|
+
for line in track(f, description="Parsing GTF..."):
|
|
86
|
+
# i += 1
|
|
87
|
+
# if i % 200_000 == 0:
|
|
88
|
+
# logging.info(f"Processed {i:,} lines")
|
|
89
|
+
if line.startswith("#"):
|
|
90
|
+
continue
|
|
91
|
+
line = line.strip().split("\t")
|
|
92
|
+
if len(line) < 9 or line[2] != "exon":
|
|
93
|
+
continue
|
|
94
|
+
d = parse_annot(line[8])
|
|
95
|
+
if "gene_id" in d and "transcript_id" in d:
|
|
96
|
+
gene_id = d["gene_id"]
|
|
97
|
+
transcript_id = d["transcript_id"]
|
|
98
|
+
exon_id = d["exon_number"] if "exon_number" in d else d["exon"]
|
|
99
|
+
elif "Parent" in d and "ID" in d:
|
|
100
|
+
gene_id = d["Parent"]
|
|
101
|
+
transcript_id, exon_id = d["ID"].rsplit(".", 1)
|
|
102
|
+
exon_id = exon_id.removeprefix("exon")
|
|
103
|
+
else:
|
|
104
|
+
continue
|
|
105
|
+
# if exon id is digit, convert to interger
|
|
106
|
+
if exon_id.isdigit():
|
|
107
|
+
exon_id = int(exon_id)
|
|
108
|
+
if "tag" in d:
|
|
109
|
+
tags = d["tag"].split("; ")
|
|
110
|
+
if "MANE_Select" in tags:
|
|
111
|
+
priority = (0, 0)
|
|
112
|
+
elif "Ensembl_canonical" in tags:
|
|
113
|
+
priority = (0, 1)
|
|
114
|
+
elif "basic" in tags:
|
|
115
|
+
priority = (0, 2)
|
|
116
|
+
else:
|
|
117
|
+
priority = (10, 0)
|
|
118
|
+
elif (
|
|
119
|
+
"transcript_support_level" in d
|
|
120
|
+
and (sl := d.get("transcript_support_level", "").split()[0]).isdigit()
|
|
121
|
+
):
|
|
122
|
+
priority = (1, int(sl))
|
|
123
|
+
else:
|
|
124
|
+
priority = (10, 0)
|
|
125
|
+
gene_dict[gene_id][transcript_id].priority = priority
|
|
126
|
+
gene_dict[gene_id][transcript_id].add_span(
|
|
127
|
+
gene_id,
|
|
128
|
+
d.get("gene_name", None),
|
|
129
|
+
line[0],
|
|
130
|
+
line[6],
|
|
131
|
+
exon_id,
|
|
132
|
+
int(line[3]),
|
|
133
|
+
int(line[4]),
|
|
134
|
+
)
|
|
135
|
+
return gene_dict
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def rank_exons(tx_id, exon_info):
|
|
139
|
+
# level 0, MANE_Select, highest priority
|
|
140
|
+
# level 1, transcript_support_level, smaller level will have higher priority
|
|
141
|
+
# level 10 (default), others, lowest priority
|
|
142
|
+
# patched:
|
|
143
|
+
# level 2, tx_id with ".1", ".2", .. suffix, smaller number will have higher priority
|
|
144
|
+
# level 3, tx_id with "-01", "-02", .. suffix, smaller number will have higher priority
|
|
145
|
+
# level 4, tx_len, longer transcript will have higher priority
|
|
146
|
+
if exon_info.priority[0] < 10:
|
|
147
|
+
return exon_info.priority
|
|
148
|
+
# some plant sample such as Arabidopsis will have ".1", ".2", ... tag in the end of tx_id
|
|
149
|
+
if "." in tx_id and tx_id.split(".")[-1].isdigit():
|
|
150
|
+
return (2, int(tx_id.split(".")[-1]))
|
|
151
|
+
# some plant sample such as rice will have "-01" tag in the end of tx_id
|
|
152
|
+
if "-" in tx_id and tx_id.split("-")[-1].isdigit():
|
|
153
|
+
return (3, int(tx_id.split("-")[-1]))
|
|
154
|
+
# start from 100 to 100,000, longer transcript will have higher priority
|
|
155
|
+
tx_len = exon_info.calc_len()
|
|
156
|
+
return (4, 100_100 - tx_len if tx_len < 100_000 else 100_001)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def parse_file(gtf_file, fasta_file, output_file, seq_file):
|
|
160
|
+
gene_dict = read_gtf(
|
|
161
|
+
gtf_file, is_gff=gtf_file.endswith("gff") or gtf_file.endswith("gff3")
|
|
162
|
+
)
|
|
163
|
+
fasta = Fasta(fasta_file, read_ahead=100_000)
|
|
164
|
+
# i = 0
|
|
165
|
+
with open(output_file, "w") as f1, open(seq_file, "w") as f2:
|
|
166
|
+
f1.write("gene_id\ttranscript_id\tgene_name\tchrom\tstrand\tspans\n")
|
|
167
|
+
for g, v in track(gene_dict.items(), description="Fetching sequences..."):
|
|
168
|
+
# i += 1
|
|
169
|
+
# if i % 5_000 == 0:
|
|
170
|
+
# logging.info(f"Processed {i:,} genes")
|
|
171
|
+
vs = sorted(v.items(), key=lambda x: rank_exons(x[0], x[1]))
|
|
172
|
+
e, v2 = vs[0]
|
|
173
|
+
f1.write(f"{g}\t{e}\t{v2}\n")
|
|
174
|
+
f2.write(f">{g}\n{v2.get_seq(fasta)}\n")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
if __name__ == "__main__":
|
|
178
|
+
import argparse
|
|
179
|
+
|
|
180
|
+
argparser = argparse.ArgumentParser()
|
|
181
|
+
argparser.add_argument("-g", "--gtf-file", help="GTF file", required=True)
|
|
182
|
+
argparser.add_argument("-f", "--fasta-file", help="Fasta file", required=True)
|
|
183
|
+
argparser.add_argument("-o", "--output-file", help="Output file", required=True)
|
|
184
|
+
argparser.add_argument(
|
|
185
|
+
"-s", "--seq-file", help="Output sequence file", required=True
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
args = argparser.parse_args()
|
|
189
|
+
parse_file(args.gtf_file, args.fasta_file, args.output_file, args.seq_file)
|