biopipen 0.28.1__py3-none-any.whl → 0.29.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (85) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +8 -0
  3. biopipen/ns/bam.py +0 -2
  4. biopipen/ns/bed.py +35 -0
  5. biopipen/ns/cellranger_pipeline.py +5 -5
  6. biopipen/ns/cnv.py +18 -2
  7. biopipen/ns/cnvkit_pipeline.py +16 -11
  8. biopipen/ns/gene.py +68 -23
  9. biopipen/ns/misc.py +2 -15
  10. biopipen/ns/plot.py +204 -0
  11. biopipen/ns/regulatory.py +214 -0
  12. biopipen/ns/scrna.py +31 -5
  13. biopipen/ns/snp.py +516 -8
  14. biopipen/ns/stats.py +167 -3
  15. biopipen/ns/vcf.py +196 -0
  16. biopipen/reports/snp/PlinkCallRate.svelte +24 -0
  17. biopipen/reports/snp/PlinkFreq.svelte +18 -0
  18. biopipen/reports/snp/PlinkHWE.svelte +18 -0
  19. biopipen/reports/snp/PlinkHet.svelte +18 -0
  20. biopipen/reports/snp/PlinkIBD.svelte +18 -0
  21. biopipen/scripts/bam/CNVpytor.py +144 -46
  22. biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
  23. biopipen/scripts/bed/BedtoolsMerge.py +1 -1
  24. biopipen/scripts/cnv/AneuploidyScore.R +30 -7
  25. biopipen/scripts/cnv/AneuploidyScoreSummary.R +5 -2
  26. biopipen/scripts/cnv/TMADScore.R +21 -5
  27. biopipen/scripts/cnv/TMADScoreSummary.R +6 -2
  28. biopipen/scripts/cnvkit/CNVkitAccess.py +2 -1
  29. biopipen/scripts/cnvkit/CNVkitAutobin.py +3 -2
  30. biopipen/scripts/cnvkit/CNVkitBatch.py +1 -1
  31. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -1
  32. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +1 -1
  33. biopipen/scripts/cnvkit/CNVkitHeatmap.py +1 -1
  34. biopipen/scripts/cnvkit/CNVkitReference.py +2 -1
  35. biopipen/scripts/delim/SampleInfo.R +10 -5
  36. biopipen/scripts/gene/GeneNameConversion.R +65 -0
  37. biopipen/scripts/gene/GenePromoters.R +61 -0
  38. biopipen/scripts/misc/Shell.sh +15 -0
  39. biopipen/scripts/plot/Manhattan.R +146 -0
  40. biopipen/scripts/plot/QQPlot.R +146 -0
  41. biopipen/scripts/regulatory/MotifAffinityTest.R +226 -0
  42. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +126 -0
  43. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +96 -0
  44. biopipen/scripts/regulatory/MotifScan.py +159 -0
  45. biopipen/scripts/regulatory/atSNP.R +33 -0
  46. biopipen/scripts/regulatory/motifBreakR.R +1594 -0
  47. biopipen/scripts/scrna/MarkersFinder.R +69 -67
  48. biopipen/scripts/scrna/SeuratClustering.R +71 -29
  49. biopipen/scripts/scrna/SeuratMap2Ref.R +20 -0
  50. biopipen/scripts/scrna/SeuratPreparing.R +252 -122
  51. biopipen/scripts/scrna/SeuratSubClustering.R +76 -27
  52. biopipen/scripts/snp/MatrixEQTL.R +85 -44
  53. biopipen/scripts/snp/Plink2GTMat.py +133 -0
  54. biopipen/scripts/snp/PlinkCallRate.R +190 -0
  55. biopipen/scripts/snp/PlinkFilter.py +100 -0
  56. biopipen/scripts/snp/PlinkFreq.R +298 -0
  57. biopipen/scripts/snp/PlinkFromVcf.py +78 -0
  58. biopipen/scripts/snp/PlinkHWE.R +80 -0
  59. biopipen/scripts/snp/PlinkHet.R +92 -0
  60. biopipen/scripts/snp/PlinkIBD.R +200 -0
  61. biopipen/scripts/snp/PlinkUpdateName.py +124 -0
  62. biopipen/scripts/stats/Mediation.R +94 -0
  63. biopipen/scripts/stats/MetaPvalue.R +2 -1
  64. biopipen/scripts/stats/MetaPvalue1.R +70 -0
  65. biopipen/scripts/tcr/TCRClusterStats.R +12 -7
  66. biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
  67. biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
  68. biopipen/scripts/vcf/BcftoolsSort.py +113 -0
  69. biopipen/scripts/vcf/BcftoolsView.py +73 -0
  70. biopipen/scripts/vcf/VcfFix_utils.py +1 -1
  71. biopipen/scripts/vcf/bcftools_utils.py +52 -0
  72. biopipen/utils/gene.R +83 -37
  73. biopipen/utils/gene.py +108 -60
  74. biopipen/utils/misc.R +56 -0
  75. biopipen/utils/misc.py +5 -2
  76. biopipen/utils/reference.py +54 -10
  77. {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/METADATA +2 -2
  78. {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/RECORD +80 -51
  79. {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/entry_points.txt +1 -1
  80. biopipen/ns/bcftools.py +0 -111
  81. biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
  82. biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
  83. biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
  84. biopipen/scripts/gene/GeneNameConversion.py +0 -66
  85. {biopipen-0.28.1.dist-info → biopipen-0.29.1.dist-info}/WHEEL +0 -0
@@ -1,79 +0,0 @@
1
- import shutil
2
- from pathlib import Path
3
- from hashlib import md5
4
-
5
- from biopipen.core.filters import dict_to_cli_args, run_command
6
-
7
- infile = {{in.infile | repr}} # pyright: ignore
8
- outfile = {{out.outfile | repr}} # pyright: ignore
9
- bcftools = {{envs.bcftools | repr}} # pyright: ignore
10
- keep = {{envs.keep | repr}} # pyright: ignore
11
- args = {{envs.args | repr}} # pyright: ignore
12
- ncores = {{envs.ncores | repr}} # pyright: ignore
13
- tmpdir = {{envs.tmpdir | repr}} # pyright: ignore
14
- includes = {{envs.includes | repr}} # pyright: ignore
15
- excludes = {{envs.excludes | repr}} # pyright: ignore
16
-
17
- args[""] = bcftools
18
- args["_"] = infile
19
- args["o"] = outfile
20
- args["threads"] = ncores
21
- if "O" not in args and "output-type" not in args:
22
- args["O"] = "z" if infile.endswith(".gz") else "v"
23
- if "m" not in args and "mode" not in args:
24
- args["m"] = "+"
25
-
26
- tmpdir = (
27
- Path(tmpdir) / f"biopipen-bcftoolsfilter-{md5(infile.encode()).hexdigest()}"
28
- )
29
- tmpdir.mkdir(parents=True, exist_ok=True)
30
- # a.vcf.gz -> a
31
- # a.vcf -> a
32
- stem = Path(infile).stem
33
- if stem.endswith(".vcf"):
34
- stem = stem[:-4]
35
- # .vcf.gz
36
- # .gz
37
- ext = Path(infile).name[len(stem):]
38
-
39
- FILTER_INDEX = [1]
40
-
41
- def handle_filter(vcf, fname, filt, flag):
42
- print("- Handling filter ", fname, ": ", filt, " ...")
43
-
44
- arguments = args.copy()
45
- arguments[flag] = filt
46
- arguments["_"] = vcf
47
- arguments["o"] = tmpdir / f"{stem}.{fname}{ext}"
48
- if keep:
49
- arguments["s"] = fname
50
-
51
- run_command(dict_to_cli_args(arguments, dashify=True), fg=True)
52
- return arguments["o"]
53
-
54
-
55
- def normalize_expr(expr, flag):
56
- out = {}
57
- if not expr:
58
- return out
59
- if isinstance(expr, list):
60
- for ex in expr:
61
- out[f"FILTER{FILTER_INDEX[0]}"] = (ex, flag)
62
- FILTER_INDEX[0] += 1
63
- elif isinstance(expr, dict):
64
- for name, ex in expr.items():
65
- out[name] = (ex, flag)
66
- else: # str
67
- out[f"FILTER{FILTER_INDEX[0]}"] = (expr, flag)
68
- FILTER_INDEX[0] += 1
69
- return out
70
-
71
- includes = normalize_expr(includes, "include")
72
- excludes = normalize_expr(excludes, "exclude")
73
- includes.update(excludes)
74
-
75
- # bcftools can be only done once at one filter
76
- for fname, (filt, flag) in includes.items():
77
- infile = handle_filter(infile, fname, filt, flag)
78
-
79
- shutil.copy2(infile, outfile)
@@ -1,19 +0,0 @@
1
- from biopipen.utils.misc import run_command, dict_to_cli_args
2
-
3
- infile = {{in.infile | quote}} # pyright: ignore
4
- outfile = {{out.outfile | quote}} # pyright: ignore
5
- bcftools = {{envs.bcftools | quote}} # pyright: ignore
6
- gz = {{envs.gz | repr}} # pyright: ignore
7
- args = {{envs.args | repr}} # pyright: ignore
8
- tmpdir = {{envs.tmpdir | quote}} # pyright: ignore
9
- index = {{envs.index | repr}} # pyright: ignore
10
-
11
- args[""] = bcftools
12
- args["_"] = infile
13
- args["o"] = outfile
14
- args["O"] = "z" if gz or index else "v"
15
-
16
- run_command(dict_to_cli_args(args, dashify=True), fg=True)
17
-
18
- if index:
19
- run_command([bcftools, "index", outfile], fg=True)
@@ -1,66 +0,0 @@
1
- import pandas
2
- from datar.all import c, right_join, select, relocate
3
- from biopipen.utils.gene import gene_name_conversion
4
-
5
- infile = {{in.infile | quote}} # pyright: ignore
6
- outfile = {{out.outfile | quote}} # pyright: ignore
7
- inopts = {{envs.inopts | repr}} # pyright: ignore
8
- outopts = {{envs.outopts | repr}} # pyright: ignore
9
- notfound = {{envs.notfound | repr}} # pyright: ignore
10
- genecol = {{envs.genecol | repr}} # pyright: ignore
11
- output = {{envs.output | repr}} # pyright: ignore
12
- infmt = {{envs.infmt | repr}} # pyright: ignore
13
- outfmt = {{envs.outfmt | repr}} # pyright: ignore
14
- species = {{envs.species | quote}} # pyright: ignore
15
-
16
- df = pandas.read_csv(infile, **inopts)
17
-
18
- if isinstance(genecol, int):
19
- genes = df.iloc[:, genecol]
20
- else:
21
- genes = df.loc[:, genecol]
22
-
23
- colname = genes.name
24
- genes = genes.tolist()
25
-
26
- # query `outfmt`
27
- # <object> <object>
28
- # 0 1255_g_at GUCA1A
29
- # 1 1316_at THRA
30
- # 2 1320_at PTPN21
31
- # 3 1294_at MIR5193
32
- converted = gene_name_conversion(
33
- genes=genes,
34
- species=species,
35
- infmt=infmt,
36
- outfmt=outfmt,
37
- notfound=notfound,
38
- )
39
- converted.columns = [colname] + converted.columns[1:].tolist()
40
-
41
- if output == "only":
42
- out = converted
43
-
44
- elif output == "keep":
45
- out = df >> right_join(converted, by=colname, suffix=["", "_converted"])
46
-
47
- elif output == "drop":
48
- out = df >> right_join(
49
- converted,
50
- by=colname, suffix=["", "_converted"]
51
- ) >> select(~c(colname))
52
-
53
- elif output == "replace":
54
- out = df >> right_join(
55
- converted, by=colname, suffix=["", "_converted"]
56
- )
57
- converted_cols = out.columns[-len(converted.columns)+1:].tolist()
58
- pos = df.columns.get_indexer([colname])[0]
59
- out = out >> relocate(
60
- converted_cols, _after=pos+1
61
- ) >> select(~c(colname))
62
-
63
- else:
64
- raise ValueError(f"Unknown output mode: {output}.")
65
-
66
- out.to_csv(outfile, **outopts)