biopipen 0.32.3__py3-none-any.whl → 0.33.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (118) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +6 -0
  3. biopipen/core/filters.py +35 -23
  4. biopipen/core/testing.py +6 -1
  5. biopipen/ns/bam.py +39 -0
  6. biopipen/ns/cellranger.py +5 -0
  7. biopipen/ns/cellranger_pipeline.py +2 -2
  8. biopipen/ns/cnvkit_pipeline.py +4 -1
  9. biopipen/ns/delim.py +33 -27
  10. biopipen/ns/protein.py +99 -0
  11. biopipen/ns/scrna.py +428 -250
  12. biopipen/ns/snp.py +16 -3
  13. biopipen/ns/tcr.py +125 -1
  14. biopipen/ns/vcf.py +34 -0
  15. biopipen/ns/web.py +5 -1
  16. biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
  17. biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
  18. biopipen/reports/tcr/ClonalStats.svelte +15 -0
  19. biopipen/reports/utils/misc.liq +20 -7
  20. biopipen/scripts/bam/BamMerge.py +2 -2
  21. biopipen/scripts/bam/BamSampling.py +4 -4
  22. biopipen/scripts/bam/BamSort.py +141 -0
  23. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  24. biopipen/scripts/bam/BamSubsetByBed.py +3 -3
  25. biopipen/scripts/bam/CNVpytor.py +10 -10
  26. biopipen/scripts/bam/ControlFREEC.py +11 -11
  27. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  28. biopipen/scripts/bed/BedConsensus.py +5 -5
  29. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  30. biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
  31. biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
  32. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  33. biopipen/scripts/cellranger/CellRangerCount.py +20 -9
  34. biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
  35. biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
  36. biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
  37. biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
  38. biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
  39. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
  41. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  42. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  43. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
  44. biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
  45. biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
  46. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  47. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  48. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  49. biopipen/scripts/delim/SampleInfo.R +94 -148
  50. biopipen/scripts/misc/Config2File.py +2 -2
  51. biopipen/scripts/misc/Str2File.py +2 -2
  52. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  53. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  54. biopipen/scripts/protein/Prodigy.py +4 -4
  55. biopipen/scripts/protein/RMSD.py +178 -0
  56. biopipen/scripts/regulatory/MotifScan.py +8 -8
  57. biopipen/scripts/scrna/CellCellCommunication.py +59 -22
  58. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  59. biopipen/scripts/scrna/MarkersFinder.R +273 -654
  60. biopipen/scripts/scrna/RadarPlots.R +73 -53
  61. biopipen/scripts/scrna/SCP-plot.R +15202 -0
  62. biopipen/scripts/scrna/ScVelo.py +0 -0
  63. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -31
  64. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -54
  65. biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -403
  66. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +32 -17
  67. biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -239
  68. biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
  69. biopipen/scripts/scrna/SeuratMap2Ref.R +16 -12
  70. biopipen/scripts/scrna/SeuratPreparing.R +138 -81
  71. biopipen/scripts/scrna/SlingShot.R +71 -0
  72. biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
  73. biopipen/scripts/snp/Plink2GTMat.py +26 -11
  74. biopipen/scripts/snp/PlinkFilter.py +7 -7
  75. biopipen/scripts/snp/PlinkFromVcf.py +8 -5
  76. biopipen/scripts/snp/PlinkSimulation.py +4 -4
  77. biopipen/scripts/snp/PlinkUpdateName.py +4 -4
  78. biopipen/scripts/stats/ChowTest.R +48 -22
  79. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  80. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  81. biopipen/scripts/tcr/ClonalStats.R +484 -0
  82. biopipen/scripts/tcr/ScRepLoading.R +127 -0
  83. biopipen/scripts/tcr/TCRDock.py +10 -6
  84. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  85. biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
  86. biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
  87. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  88. biopipen/scripts/vcf/BcftoolsSort.py +4 -4
  89. biopipen/scripts/vcf/BcftoolsView.py +5 -5
  90. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  91. biopipen/scripts/vcf/VcfAnno.py +11 -11
  92. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  93. biopipen/scripts/vcf/VcfFilter.py +5 -5
  94. biopipen/scripts/vcf/VcfFix.py +7 -7
  95. biopipen/scripts/vcf/VcfFix_utils.py +12 -3
  96. biopipen/scripts/vcf/VcfIndex.py +3 -3
  97. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  98. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  99. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  100. biopipen/scripts/vcf/bcftools_utils.py +3 -3
  101. biopipen/scripts/web/Download.py +8 -4
  102. biopipen/scripts/web/DownloadList.py +5 -5
  103. biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
  104. biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
  105. biopipen/scripts/web/gcloud_common.py +1 -1
  106. biopipen/utils/gsea.R +75 -35
  107. biopipen/utils/misc.R +205 -7
  108. biopipen/utils/misc.py +17 -8
  109. biopipen/utils/reference.py +11 -11
  110. biopipen/utils/repr.R +146 -0
  111. biopipen/utils/vcf.py +1 -1
  112. {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/METADATA +8 -8
  113. {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/RECORD +115 -105
  114. {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/WHEEL +1 -1
  115. biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -144
  116. biopipen/scripts/scrna/SeuratPreparing-common.R +0 -467
  117. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -204
  118. {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,8 @@
1
1
  from pathlib import Path
2
2
  from biopipen.utils.misc import run_command, logger
3
+ import os
3
4
  import numpy as np
5
+ import pandas as pd
4
6
  import scanpy
5
7
  import liana
6
8
  import liana.method.sc._liana_pipe as _liana_pipe
@@ -21,52 +23,87 @@ def _trimean(a, axis=0):
21
23
  _liana_pipe._trimean = _trimean
22
24
 
23
25
 
24
- sobjfile = Path({{in.sobjfile | repr}}) # pyright: ignore # noqa: E999
25
- outfile = Path({{out.outfile | repr}}) # pyright: ignore
26
- envs = {{envs | repr}} # pyright: ignore
26
+ sobjfile = Path({{in.sobjfile | quote}}) # pyright: ignore # noqa: E999
27
+ outfile = Path({{out.outfile | quote}}) # pyright: ignore
28
+ envs: dict = {{envs | dict}} # pyright: ignore
27
29
 
30
+ # https://github.com/h5py/h5py/issues/1082#issuecomment-1311498466
31
+ os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
28
32
  method = envs.pop("method")
29
33
  assay = envs.pop("assay")
30
34
  ncores = envs.pop("ncores")
31
35
  species = envs.pop("species")
32
36
  rscript = envs.pop("rscript")
37
+ subset = envs.pop("subset")
38
+ subset_using = envs.pop("subset_using", "auto")
39
+ if subset_using == "auto":
40
+ subset_using = "python" if subset and "[" in subset else "r"
41
+ split_by = envs.pop("split_by")
33
42
 
34
43
  if sobjfile.suffix.lower() == ".rds" or sobjfile.suffix.lower() == ".h5seurat":
44
+ logger.info("Converting the Seurat object to h5ad ...")
45
+
35
46
  annfile = outfile.parent / f"{sobjfile.stem}.h5ad"
36
- r_script_convert_to_anndata = f"""
37
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
38
- {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
39
-
40
- seurat_to_anndata(
41
- "{sobjfile}",
42
- "{annfile}",
43
- assay = {{ envs.assay | r }},
44
- log_info = log_info
45
- )
46
- """
47
+ if subset and subset_using == "r":
48
+ r_script_convert_to_anndata = (
49
+ "biopipen.utils::ConvertSeuratToAnnData"
50
+ f"({str(sobjfile)!r}, {str(annfile)!r}, "
51
+ f"assay = {{envs['assay'] | r}}, subset = {{envs['subset'] | r}})"
52
+ )
53
+ else:
54
+ r_script_convert_to_anndata = (
55
+ "biopipen.utils::ConvertSeuratToAnnData"
56
+ f"({str(sobjfile)!r}, {str(annfile)!r}, assay = {{envs['assay'] | r}})"
57
+ )
47
58
  run_command([rscript, "-e", r_script_convert_to_anndata], fg=True)
48
-
49
59
  sobjfile = annfile
60
+ elif subset and subset == "r":
61
+ raise ValueError(
62
+ "h5ad file is provided as input, ",
63
+ "'subset' can only be a 'python' expression (`envs.subset_using = 'python'`)."
64
+ )
50
65
 
51
66
  logger.info("Reading the h5ad file ...")
52
67
  adata = scanpy.read_h5ad(sobjfile)
53
68
 
69
+ if subset and subset_using == "python":
70
+ logger.info("Subsetting the data ...")
71
+ adata = adata[{{envs['subset']}}] # pyright: ignore
72
+
54
73
  method = method.lower()
55
74
  if method == "log2fc":
56
75
  method_fun = liana.mt.logfc
57
76
  else:
58
77
  method_fun = getattr(liana.mt, method)
59
78
 
60
- logger.info(f"Running {method} ...")
61
- envs["adata"] = adata
62
79
  envs["resource_name"] = "consensus" if species == "human" else "mouseconsensus"
63
80
  envs["n_jobs"] = ncores
64
81
  envs["inplace"] = True
65
82
  envs["verbose"] = True
66
83
  envs["key_added"] = "liana_ccc"
67
- method_fun(**envs)
68
84
 
69
- res = adata.uns['liana_ccc']
85
+ if split_by:
86
+ split_vals = adata.obs[split_by].unique()
87
+ result: pd.DataFrame = None # type: ignore
88
+ for split_val in split_vals:
89
+ logger.info(f"Running {method} for {split_by} = {split_val} ...")
90
+ adata_split = adata[adata.obs[split_by] == split_val]
91
+ envs["adata"] = adata_split
92
+
93
+ method_fun(**envs)
94
+ res = adata_split.uns['liana_ccc']
95
+ res[split_by] = split_val
96
+
97
+ if result is None:
98
+ result = res
99
+ else:
100
+ result = pd.concat([result, res], ignore_index=True)
101
+ else:
102
+ logger.info(f"Running {method} ...")
103
+ envs["adata"] = adata
104
+ method_fun(**envs)
105
+
106
+ result = adata.uns['liana_ccc']
70
107
 
71
108
  mag_score_names = {
72
109
  "cellphonedb": "lr_means",
@@ -93,9 +130,9 @@ spec_score_names = {
93
130
  }
94
131
 
95
132
  if mag_score_names[method] is not None:
96
- res['mag_score'] = res[mag_score_names[method]]
133
+ result['mag_score'] = result[mag_score_names[method]]
97
134
  if spec_score_names[method] is not None:
98
- res['spec_score'] = res[spec_score_names[method]]
135
+ result['spec_score'] = result[spec_score_names[method]]
99
136
 
100
137
  logger.info("Saving the result ...")
101
- res.to_csv(outfile, sep="\t", index=False)
138
+ result.to_csv(outfile, sep="\t", index=False)
@@ -0,0 +1,51 @@
1
+ library(loomR)
2
+ library(DropletUtils)
3
+ library(Matrix)
4
+
5
+ loomfile <- {{in.loomfile | r}}
6
+ outdir <- {{out.outdir | r}}
7
+
8
+ lfile <- connect(filename = loomfile, mode = "r")
9
+
10
+ # Extract the expression matrix (genes x cells)
11
+ expr_matrix <- t(lfile[["matrix"]][, ])
12
+ if (!inherits(expr_matrix, "dgCMatrix")) {
13
+ expr_matrix <- Matrix::Matrix(expr_matrix, sparse = TRUE)
14
+ }
15
+
16
+ # Extract gene names and IDs
17
+ gene_names <- lfile[["row_attrs/Gene"]][]
18
+
19
+ gene_ids <- tryCatch({
20
+ lfile[["row_attrs/GeneID"]][]
21
+ }, error = function(e) {
22
+ NULL
23
+ })
24
+
25
+ if (is.null(gene_ids)) {
26
+ gene_ids <- gene_names
27
+ }
28
+
29
+ # Extract cell barcodes
30
+ cell_barcodes <- lfile[["col_attrs/CellID"]][]
31
+
32
+ # Close the LOOM file connection
33
+ lfile$close_all()
34
+
35
+ # Create a data frame for gene information
36
+ gene_info <- data.frame(
37
+ gene_id = gene_ids,
38
+ gene_name = gene_names
39
+ )
40
+
41
+ # Write the data to 10X format
42
+
43
+ write10xCounts(
44
+ path = outdir,
45
+ x = expr_matrix,
46
+ gene.id = gene_info$gene_id,
47
+ gene.symbol = gene_info$gene_name,
48
+ barcodes = cell_barcodes,
49
+ version = "3",
50
+ overwrite = TRUE
51
+ )