biopipen 0.32.3__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (117) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +6 -0
  3. biopipen/core/filters.py +35 -23
  4. biopipen/core/testing.py +6 -1
  5. biopipen/ns/bam.py +39 -0
  6. biopipen/ns/cellranger.py +5 -0
  7. biopipen/ns/cellranger_pipeline.py +2 -2
  8. biopipen/ns/cnvkit_pipeline.py +4 -1
  9. biopipen/ns/delim.py +33 -27
  10. biopipen/ns/protein.py +99 -0
  11. biopipen/ns/scrna.py +411 -250
  12. biopipen/ns/snp.py +16 -3
  13. biopipen/ns/tcr.py +125 -1
  14. biopipen/ns/vcf.py +34 -0
  15. biopipen/ns/web.py +5 -1
  16. biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
  17. biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
  18. biopipen/reports/tcr/ClonalStats.svelte +15 -0
  19. biopipen/reports/utils/misc.liq +20 -7
  20. biopipen/scripts/bam/BamMerge.py +2 -2
  21. biopipen/scripts/bam/BamSampling.py +4 -4
  22. biopipen/scripts/bam/BamSort.py +141 -0
  23. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  24. biopipen/scripts/bam/BamSubsetByBed.py +3 -3
  25. biopipen/scripts/bam/CNVpytor.py +10 -10
  26. biopipen/scripts/bam/ControlFREEC.py +11 -11
  27. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  28. biopipen/scripts/bed/BedConsensus.py +5 -5
  29. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  30. biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
  31. biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
  32. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  33. biopipen/scripts/cellranger/CellRangerCount.py +20 -9
  34. biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
  35. biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
  36. biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
  37. biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
  38. biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
  39. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
  41. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  42. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  43. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
  44. biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
  45. biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
  46. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  47. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  48. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  49. biopipen/scripts/delim/SampleInfo.R +85 -148
  50. biopipen/scripts/misc/Config2File.py +2 -2
  51. biopipen/scripts/misc/Str2File.py +2 -2
  52. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  53. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  54. biopipen/scripts/protein/Prodigy.py +4 -4
  55. biopipen/scripts/protein/RMSD.py +178 -0
  56. biopipen/scripts/regulatory/MotifScan.py +8 -8
  57. biopipen/scripts/scrna/CellCellCommunication.py +59 -22
  58. biopipen/scripts/scrna/MarkersFinder.R +273 -654
  59. biopipen/scripts/scrna/RadarPlots.R +73 -53
  60. biopipen/scripts/scrna/SCP-plot.R +15202 -0
  61. biopipen/scripts/scrna/ScVelo.py +0 -0
  62. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -31
  63. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -54
  64. biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -403
  65. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +32 -17
  66. biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -239
  67. biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
  68. biopipen/scripts/scrna/SeuratMap2Ref.R +16 -12
  69. biopipen/scripts/scrna/SeuratPreparing.R +138 -81
  70. biopipen/scripts/scrna/SlingShot.R +71 -0
  71. biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
  72. biopipen/scripts/snp/Plink2GTMat.py +26 -11
  73. biopipen/scripts/snp/PlinkFilter.py +7 -7
  74. biopipen/scripts/snp/PlinkFromVcf.py +8 -5
  75. biopipen/scripts/snp/PlinkSimulation.py +4 -4
  76. biopipen/scripts/snp/PlinkUpdateName.py +4 -4
  77. biopipen/scripts/stats/ChowTest.R +48 -22
  78. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  79. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  80. biopipen/scripts/tcr/ClonalStats.R +484 -0
  81. biopipen/scripts/tcr/ScRepLoading.R +127 -0
  82. biopipen/scripts/tcr/TCRDock.py +10 -6
  83. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  84. biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
  85. biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
  86. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  87. biopipen/scripts/vcf/BcftoolsSort.py +4 -4
  88. biopipen/scripts/vcf/BcftoolsView.py +5 -5
  89. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  90. biopipen/scripts/vcf/VcfAnno.py +11 -11
  91. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  92. biopipen/scripts/vcf/VcfFilter.py +5 -5
  93. biopipen/scripts/vcf/VcfFix.py +7 -7
  94. biopipen/scripts/vcf/VcfFix_utils.py +12 -3
  95. biopipen/scripts/vcf/VcfIndex.py +3 -3
  96. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  97. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  98. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  99. biopipen/scripts/vcf/bcftools_utils.py +3 -3
  100. biopipen/scripts/web/Download.py +8 -4
  101. biopipen/scripts/web/DownloadList.py +5 -5
  102. biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
  103. biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
  104. biopipen/scripts/web/gcloud_common.py +1 -1
  105. biopipen/utils/gsea.R +75 -35
  106. biopipen/utils/misc.R +205 -7
  107. biopipen/utils/misc.py +17 -8
  108. biopipen/utils/reference.py +11 -11
  109. biopipen/utils/repr.R +146 -0
  110. biopipen/utils/vcf.py +1 -1
  111. {biopipen-0.32.3.dist-info → biopipen-0.33.0.dist-info}/METADATA +8 -8
  112. {biopipen-0.32.3.dist-info → biopipen-0.33.0.dist-info}/RECORD +114 -105
  113. {biopipen-0.32.3.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
  114. biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -144
  115. biopipen/scripts/scrna/SeuratPreparing-common.R +0 -467
  116. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -204
  117. {biopipen-0.32.3.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,8 @@
1
1
  from pathlib import Path
2
2
  from biopipen.utils.misc import run_command, logger
3
+ import os
3
4
  import numpy as np
5
+ import pandas as pd
4
6
  import scanpy
5
7
  import liana
6
8
  import liana.method.sc._liana_pipe as _liana_pipe
@@ -21,52 +23,87 @@ def _trimean(a, axis=0):
21
23
  _liana_pipe._trimean = _trimean
22
24
 
23
25
 
24
- sobjfile = Path({{in.sobjfile | repr}}) # pyright: ignore # noqa: E999
25
- outfile = Path({{out.outfile | repr}}) # pyright: ignore
26
- envs = {{envs | repr}} # pyright: ignore
26
+ sobjfile = Path({{in.sobjfile | quote}}) # pyright: ignore # noqa: E999
27
+ outfile = Path({{out.outfile | quote}}) # pyright: ignore
28
+ envs: dict = {{envs | dict}} # pyright: ignore
27
29
 
30
+ # https://github.com/h5py/h5py/issues/1082#issuecomment-1311498466
31
+ os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
28
32
  method = envs.pop("method")
29
33
  assay = envs.pop("assay")
30
34
  ncores = envs.pop("ncores")
31
35
  species = envs.pop("species")
32
36
  rscript = envs.pop("rscript")
37
+ subset = envs.pop("subset")
38
+ subset_using = envs.pop("subset_using", "auto")
39
+ if subset_using == "auto":
40
+ subset_using = "python" if subset and "[" in subset else "r"
41
+ split_by = envs.pop("split_by")
33
42
 
34
43
  if sobjfile.suffix.lower() == ".rds" or sobjfile.suffix.lower() == ".h5seurat":
44
+ logger.info("Converting the Seurat object to h5ad ...")
45
+
35
46
  annfile = outfile.parent / f"{sobjfile.stem}.h5ad"
36
- r_script_convert_to_anndata = f"""
37
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
38
- {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
39
-
40
- seurat_to_anndata(
41
- "{sobjfile}",
42
- "{annfile}",
43
- assay = {{ envs.assay | r }},
44
- log_info = log_info
45
- )
46
- """
47
+ if subset and subset_using == "r":
48
+ r_script_convert_to_anndata = (
49
+ "biopipen.utils::ConvertSeuratToAnnData"
50
+ f"({str(sobjfile)!r}, {str(annfile)!r}, "
51
+ f"assay = {{envs['assay'] | r}}, subset = {{envs['subset'] | r}})"
52
+ )
53
+ else:
54
+ r_script_convert_to_anndata = (
55
+ "biopipen.utils::ConvertSeuratToAnnData"
56
+ f"({str(sobjfile)!r}, {str(annfile)!r}, assay = {{envs['assay'] | r}})"
57
+ )
47
58
  run_command([rscript, "-e", r_script_convert_to_anndata], fg=True)
48
-
49
59
  sobjfile = annfile
60
+ elif subset and subset == "r":
61
+ raise ValueError(
62
+ "h5ad file is provided as input, ",
63
+ "'subset' can only be a 'python' expression (`envs.subset_using = 'python'`)."
64
+ )
50
65
 
51
66
  logger.info("Reading the h5ad file ...")
52
67
  adata = scanpy.read_h5ad(sobjfile)
53
68
 
69
+ if subset and subset_using == "python":
70
+ logger.info("Subsetting the data ...")
71
+ adata = adata[{{envs['subset']}}] # pyright: ignore
72
+
54
73
  method = method.lower()
55
74
  if method == "log2fc":
56
75
  method_fun = liana.mt.logfc
57
76
  else:
58
77
  method_fun = getattr(liana.mt, method)
59
78
 
60
- logger.info(f"Running {method} ...")
61
- envs["adata"] = adata
62
79
  envs["resource_name"] = "consensus" if species == "human" else "mouseconsensus"
63
80
  envs["n_jobs"] = ncores
64
81
  envs["inplace"] = True
65
82
  envs["verbose"] = True
66
83
  envs["key_added"] = "liana_ccc"
67
- method_fun(**envs)
68
84
 
69
- res = adata.uns['liana_ccc']
85
+ if split_by:
86
+ split_vals = adata.obs[split_by].unique()
87
+ result: pd.DataFrame = None # type: ignore
88
+ for split_val in split_vals:
89
+ logger.info(f"Running {method} for {split_by} = {split_val} ...")
90
+ adata_split = adata[adata.obs[split_by] == split_val]
91
+ envs["adata"] = adata_split
92
+
93
+ method_fun(**envs)
94
+ res = adata_split.uns['liana_ccc']
95
+ res[split_by] = split_val
96
+
97
+ if result is None:
98
+ result = res
99
+ else:
100
+ result = pd.concat([result, res], ignore_index=True)
101
+ else:
102
+ logger.info(f"Running {method} ...")
103
+ envs["adata"] = adata
104
+ method_fun(**envs)
105
+
106
+ result = adata.uns['liana_ccc']
70
107
 
71
108
  mag_score_names = {
72
109
  "cellphonedb": "lr_means",
@@ -93,9 +130,9 @@ spec_score_names = {
93
130
  }
94
131
 
95
132
  if mag_score_names[method] is not None:
96
- res['mag_score'] = res[mag_score_names[method]]
133
+ result['mag_score'] = result[mag_score_names[method]]
97
134
  if spec_score_names[method] is not None:
98
- res['spec_score'] = res[spec_score_names[method]]
135
+ result['spec_score'] = result[spec_score_names[method]]
99
136
 
100
137
  logger.info("Saving the result ...")
101
- res.to_csv(outfile, sep="\t", index=False)
138
+ result.to_csv(outfile, sep="\t", index=False)