biopipen 0.28.1__py3-none-any.whl → 0.29.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +8 -0
- biopipen/ns/bam.py +0 -2
- biopipen/ns/bed.py +35 -0
- biopipen/ns/cellranger_pipeline.py +5 -5
- biopipen/ns/cnv.py +18 -2
- biopipen/ns/cnvkit_pipeline.py +16 -11
- biopipen/ns/gene.py +68 -23
- biopipen/ns/misc.py +2 -15
- biopipen/ns/plot.py +204 -0
- biopipen/ns/regulatory.py +214 -0
- biopipen/ns/scrna.py +31 -5
- biopipen/ns/snp.py +516 -8
- biopipen/ns/stats.py +167 -3
- biopipen/ns/vcf.py +196 -0
- biopipen/reports/snp/PlinkCallRate.svelte +24 -0
- biopipen/reports/snp/PlinkFreq.svelte +18 -0
- biopipen/reports/snp/PlinkHWE.svelte +18 -0
- biopipen/reports/snp/PlinkHet.svelte +18 -0
- biopipen/reports/snp/PlinkIBD.svelte +18 -0
- biopipen/scripts/bam/CNVpytor.py +144 -46
- biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
- biopipen/scripts/bed/BedtoolsMerge.py +1 -1
- biopipen/scripts/cnv/AneuploidyScore.R +30 -7
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
- biopipen/scripts/cnv/TMADScore.R +21 -5
- biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
- biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
- biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
- biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
- biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
- biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
- biopipen/scripts/delim/SampleInfo.R +10 -5
- biopipen/scripts/gene/GeneNameConversion.R +65 -0
- biopipen/scripts/gene/GenePromoters.R +61 -0
- biopipen/scripts/misc/Shell.sh +15 -0
- biopipen/scripts/plot/Manhattan.R +146 -0
- biopipen/scripts/plot/QQPlot.R +146 -0
- biopipen/scripts/regulatory/MotifAffinityTest.R +226 -0
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +126 -0
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +96 -0
- biopipen/scripts/regulatory/MotifScan.py +159 -0
- biopipen/scripts/regulatory/atSNP.R +33 -0
- biopipen/scripts/regulatory/motifBreakR.R +1594 -0
- biopipen/scripts/scrna/MarkersFinder.R +69 -67
- biopipen/scripts/scrna/SeuratClustering.R +71 -29
- biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
- biopipen/scripts/scrna/SeuratPreparing.R +252 -122
- biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
- biopipen/scripts/snp/MatrixEQTL.R +85 -44
- biopipen/scripts/snp/Plink2GTMat.py +133 -0
- biopipen/scripts/snp/PlinkCallRate.R +190 -0
- biopipen/scripts/snp/PlinkFilter.py +100 -0
- biopipen/scripts/snp/PlinkFreq.R +298 -0
- biopipen/scripts/snp/PlinkFromVcf.py +78 -0
- biopipen/scripts/snp/PlinkHWE.R +80 -0
- biopipen/scripts/snp/PlinkHet.R +92 -0
- biopipen/scripts/snp/PlinkIBD.R +200 -0
- biopipen/scripts/snp/PlinkUpdateName.py +124 -0
- biopipen/scripts/stats/Mediation.R +94 -0
- biopipen/scripts/stats/MetaPvalue.R +2 -1
- biopipen/scripts/stats/MetaPvalue1.R +70 -0
- biopipen/scripts/tcr/TCRClusterStats.R +12 -7
- biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
- biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
- biopipen/scripts/vcf/BcftoolsSort.py +113 -0
- biopipen/scripts/vcf/BcftoolsView.py +73 -0
- biopipen/scripts/vcf/VcfFix_utils.py +1 -1
- biopipen/scripts/vcf/bcftools_utils.py +52 -0
- biopipen/utils/gene.R +83 -37
- biopipen/utils/gene.py +108 -60
- biopipen/utils/misc.R +56 -0
- biopipen/utils/misc.py +5 -2
- biopipen/utils/reference.py +54 -10
- {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/METADATA +2 -2
- {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/RECORD +80 -51
- {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/entry_points.txt +1 -1
- biopipen/ns/bcftools.py +0 -111
- biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
- biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
- biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
- biopipen/scripts/gene/GeneNameConversion.py +0 -66
- {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/WHEEL +0 -0
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
import shutil
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from hashlib import md5
|
|
4
|
-
|
|
5
|
-
from biopipen.core.filters import dict_to_cli_args, run_command
|
|
6
|
-
|
|
7
|
-
infile = {{in.infile | repr}} # pyright: ignore
|
|
8
|
-
outfile = {{out.outfile | repr}} # pyright: ignore
|
|
9
|
-
bcftools = {{envs.bcftools | repr}} # pyright: ignore
|
|
10
|
-
keep = {{envs.keep | repr}} # pyright: ignore
|
|
11
|
-
args = {{envs.args | repr}} # pyright: ignore
|
|
12
|
-
ncores = {{envs.ncores | repr}} # pyright: ignore
|
|
13
|
-
tmpdir = {{envs.tmpdir | repr}} # pyright: ignore
|
|
14
|
-
includes = {{envs.includes | repr}} # pyright: ignore
|
|
15
|
-
excludes = {{envs.excludes | repr}} # pyright: ignore
|
|
16
|
-
|
|
17
|
-
args[""] = bcftools
|
|
18
|
-
args["_"] = infile
|
|
19
|
-
args["o"] = outfile
|
|
20
|
-
args["threads"] = ncores
|
|
21
|
-
if "O" not in args and "output-type" not in args:
|
|
22
|
-
args["O"] = "z" if infile.endswith(".gz") else "v"
|
|
23
|
-
if "m" not in args and "mode" not in args:
|
|
24
|
-
args["m"] = "+"
|
|
25
|
-
|
|
26
|
-
tmpdir = (
|
|
27
|
-
Path(tmpdir) / f"biopipen-bcftoolsfilter-{md5(infile.encode()).hexdigest()}"
|
|
28
|
-
)
|
|
29
|
-
tmpdir.mkdir(parents=True, exist_ok=True)
|
|
30
|
-
# a.vcf.gz -> a
|
|
31
|
-
# a.vcf -> a
|
|
32
|
-
stem = Path(infile).stem
|
|
33
|
-
if stem.endswith(".vcf"):
|
|
34
|
-
stem = stem[:-4]
|
|
35
|
-
# .vcf.gz
|
|
36
|
-
# .gz
|
|
37
|
-
ext = Path(infile).name[len(stem):]
|
|
38
|
-
|
|
39
|
-
FILTER_INDEX = [1]
|
|
40
|
-
|
|
41
|
-
def handle_filter(vcf, fname, filt, flag):
|
|
42
|
-
print("- Handling filter ", fname, ": ", filt, " ...")
|
|
43
|
-
|
|
44
|
-
arguments = args.copy()
|
|
45
|
-
arguments[flag] = filt
|
|
46
|
-
arguments["_"] = vcf
|
|
47
|
-
arguments["o"] = tmpdir / f"{stem}.{fname}{ext}"
|
|
48
|
-
if keep:
|
|
49
|
-
arguments["s"] = fname
|
|
50
|
-
|
|
51
|
-
run_command(dict_to_cli_args(arguments, dashify=True), fg=True)
|
|
52
|
-
return arguments["o"]
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def normalize_expr(expr, flag):
|
|
56
|
-
out = {}
|
|
57
|
-
if not expr:
|
|
58
|
-
return out
|
|
59
|
-
if isinstance(expr, list):
|
|
60
|
-
for ex in expr:
|
|
61
|
-
out[f"FILTER{FILTER_INDEX[0]}"] = (ex, flag)
|
|
62
|
-
FILTER_INDEX[0] += 1
|
|
63
|
-
elif isinstance(expr, dict):
|
|
64
|
-
for name, ex in expr.items():
|
|
65
|
-
out[name] = (ex, flag)
|
|
66
|
-
else: # str
|
|
67
|
-
out[f"FILTER{FILTER_INDEX[0]}"] = (expr, flag)
|
|
68
|
-
FILTER_INDEX[0] += 1
|
|
69
|
-
return out
|
|
70
|
-
|
|
71
|
-
includes = normalize_expr(includes, "include")
|
|
72
|
-
excludes = normalize_expr(excludes, "exclude")
|
|
73
|
-
includes.update(excludes)
|
|
74
|
-
|
|
75
|
-
# bcftools can be only done once at one filter
|
|
76
|
-
for fname, (filt, flag) in includes.items():
|
|
77
|
-
infile = handle_filter(infile, fname, filt, flag)
|
|
78
|
-
|
|
79
|
-
shutil.copy2(infile, outfile)
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
from biopipen.utils.misc import run_command, dict_to_cli_args
|
|
2
|
-
|
|
3
|
-
infile = {{in.infile | quote}} # pyright: ignore
|
|
4
|
-
outfile = {{out.outfile | quote}} # pyright: ignore
|
|
5
|
-
bcftools = {{envs.bcftools | quote}} # pyright: ignore
|
|
6
|
-
gz = {{envs.gz | repr}} # pyright: ignore
|
|
7
|
-
args = {{envs.args | repr}} # pyright: ignore
|
|
8
|
-
tmpdir = {{envs.tmpdir | quote}} # pyright: ignore
|
|
9
|
-
index = {{envs.index | repr}} # pyright: ignore
|
|
10
|
-
|
|
11
|
-
args[""] = bcftools
|
|
12
|
-
args["_"] = infile
|
|
13
|
-
args["o"] = outfile
|
|
14
|
-
args["O"] = "z" if gz or index else "v"
|
|
15
|
-
|
|
16
|
-
run_command(dict_to_cli_args(args, dashify=True), fg=True)
|
|
17
|
-
|
|
18
|
-
if index:
|
|
19
|
-
run_command([bcftools, "index", outfile], fg=True)
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import pandas
|
|
2
|
-
from datar.all import c, right_join, select, relocate
|
|
3
|
-
from biopipen.utils.gene import gene_name_conversion
|
|
4
|
-
|
|
5
|
-
infile = {{in.infile | quote}} # pyright: ignore
|
|
6
|
-
outfile = {{out.outfile | quote}} # pyright: ignore
|
|
7
|
-
inopts = {{envs.inopts | repr}} # pyright: ignore
|
|
8
|
-
outopts = {{envs.outopts | repr}} # pyright: ignore
|
|
9
|
-
notfound = {{envs.notfound | repr}} # pyright: ignore
|
|
10
|
-
genecol = {{envs.genecol | repr}} # pyright: ignore
|
|
11
|
-
output = {{envs.output | repr}} # pyright: ignore
|
|
12
|
-
infmt = {{envs.infmt | repr}} # pyright: ignore
|
|
13
|
-
outfmt = {{envs.outfmt | repr}} # pyright: ignore
|
|
14
|
-
species = {{envs.species | quote}} # pyright: ignore
|
|
15
|
-
|
|
16
|
-
df = pandas.read_csv(infile, **inopts)
|
|
17
|
-
|
|
18
|
-
if isinstance(genecol, int):
|
|
19
|
-
genes = df.iloc[:, genecol]
|
|
20
|
-
else:
|
|
21
|
-
genes = df.loc[:, genecol]
|
|
22
|
-
|
|
23
|
-
colname = genes.name
|
|
24
|
-
genes = genes.tolist()
|
|
25
|
-
|
|
26
|
-
# query `outfmt`
|
|
27
|
-
# <object> <object>
|
|
28
|
-
# 0 1255_g_at GUCA1A
|
|
29
|
-
# 1 1316_at THRA
|
|
30
|
-
# 2 1320_at PTPN21
|
|
31
|
-
# 3 1294_at MIR5193
|
|
32
|
-
converted = gene_name_conversion(
|
|
33
|
-
genes=genes,
|
|
34
|
-
species=species,
|
|
35
|
-
infmt=infmt,
|
|
36
|
-
outfmt=outfmt,
|
|
37
|
-
notfound=notfound,
|
|
38
|
-
)
|
|
39
|
-
converted.columns = [colname] + converted.columns[1:].tolist()
|
|
40
|
-
|
|
41
|
-
if output == "only":
|
|
42
|
-
out = converted
|
|
43
|
-
|
|
44
|
-
elif output == "keep":
|
|
45
|
-
out = df >> right_join(converted, by=colname, suffix=["", "_converted"])
|
|
46
|
-
|
|
47
|
-
elif output == "drop":
|
|
48
|
-
out = df >> right_join(
|
|
49
|
-
converted,
|
|
50
|
-
by=colname, suffix=["", "_converted"]
|
|
51
|
-
) >> select(~c(colname))
|
|
52
|
-
|
|
53
|
-
elif output == "replace":
|
|
54
|
-
out = df >> right_join(
|
|
55
|
-
converted, by=colname, suffix=["", "_converted"]
|
|
56
|
-
)
|
|
57
|
-
converted_cols = out.columns[-len(converted.columns)+1:].tolist()
|
|
58
|
-
pos = df.columns.get_indexer([colname])[0]
|
|
59
|
-
out = out >> relocate(
|
|
60
|
-
converted_cols, _after=pos+1
|
|
61
|
-
) >> select(~c(colname))
|
|
62
|
-
|
|
63
|
-
else:
|
|
64
|
-
raise ValueError(f"Unknown output mode: {output}.")
|
|
65
|
-
|
|
66
|
-
out.to_csv(outfile, **outopts)
|
|
File without changes
|