biopipen 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (134) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +6 -0
  3. biopipen/core/filters.py +77 -26
  4. biopipen/core/testing.py +6 -1
  5. biopipen/ns/bam.py +39 -0
  6. biopipen/ns/cellranger.py +5 -0
  7. biopipen/ns/cellranger_pipeline.py +2 -2
  8. biopipen/ns/cnvkit_pipeline.py +4 -1
  9. biopipen/ns/delim.py +33 -27
  10. biopipen/ns/protein.py +99 -0
  11. biopipen/ns/scrna.py +411 -250
  12. biopipen/ns/snp.py +16 -3
  13. biopipen/ns/tcr.py +125 -1
  14. biopipen/ns/vcf.py +34 -0
  15. biopipen/ns/web.py +5 -1
  16. biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
  17. biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
  18. biopipen/reports/tcr/ClonalStats.svelte +15 -0
  19. biopipen/reports/utils/misc.liq +22 -7
  20. biopipen/scripts/bam/BamMerge.py +2 -2
  21. biopipen/scripts/bam/BamSampling.py +4 -4
  22. biopipen/scripts/bam/BamSort.py +141 -0
  23. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  24. biopipen/scripts/bam/BamSubsetByBed.py +3 -3
  25. biopipen/scripts/bam/CNVpytor.py +10 -10
  26. biopipen/scripts/bam/ControlFREEC.py +11 -11
  27. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  28. biopipen/scripts/bed/BedConsensus.py +5 -5
  29. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  30. biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
  31. biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
  32. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  33. biopipen/scripts/cellranger/CellRangerCount.py +20 -9
  34. biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
  35. biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
  36. biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
  37. biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
  38. biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
  39. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
  41. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  42. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  43. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
  44. biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
  45. biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
  46. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  47. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  48. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  49. biopipen/scripts/delim/SampleInfo.R +85 -139
  50. biopipen/scripts/misc/Config2File.py +2 -2
  51. biopipen/scripts/misc/Str2File.py +2 -2
  52. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  53. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  54. biopipen/scripts/protein/Prodigy.py +4 -4
  55. biopipen/scripts/protein/RMSD.py +178 -0
  56. biopipen/scripts/regulatory/MotifScan.py +8 -8
  57. biopipen/scripts/scrna/CellCellCommunication.py +59 -22
  58. biopipen/scripts/scrna/CellsDistribution.R +31 -6
  59. biopipen/scripts/scrna/MarkersFinder.R +272 -602
  60. biopipen/scripts/scrna/MetaMarkers.R +16 -7
  61. biopipen/scripts/scrna/RadarPlots.R +75 -35
  62. biopipen/scripts/scrna/SCP-plot.R +15202 -0
  63. biopipen/scripts/scrna/ScVelo.py +0 -0
  64. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -25
  65. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -47
  66. biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -385
  67. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +33 -13
  68. biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -228
  69. biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
  70. biopipen/scripts/scrna/SeuratMap2Ref.R +16 -6
  71. biopipen/scripts/scrna/SeuratPreparing.R +138 -81
  72. biopipen/scripts/scrna/SlingShot.R +71 -0
  73. biopipen/scripts/scrna/TopExpressingGenes.R +9 -7
  74. biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
  75. biopipen/scripts/snp/Plink2GTMat.py +26 -11
  76. biopipen/scripts/snp/PlinkFilter.py +7 -7
  77. biopipen/scripts/snp/PlinkFromVcf.py +8 -5
  78. biopipen/scripts/snp/PlinkSimulation.py +4 -4
  79. biopipen/scripts/snp/PlinkUpdateName.py +4 -4
  80. biopipen/scripts/stats/ChowTest.R +48 -22
  81. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  82. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  83. biopipen/scripts/tcr/CDR3AAPhyschem.R +12 -2
  84. biopipen/scripts/tcr/ClonalStats.R +484 -0
  85. biopipen/scripts/tcr/CloneResidency.R +23 -5
  86. biopipen/scripts/tcr/Immunarch-basic.R +8 -1
  87. biopipen/scripts/tcr/Immunarch-clonality.R +5 -0
  88. biopipen/scripts/tcr/Immunarch-diversity.R +25 -4
  89. biopipen/scripts/tcr/Immunarch-geneusage.R +15 -1
  90. biopipen/scripts/tcr/Immunarch-kmer.R +14 -1
  91. biopipen/scripts/tcr/Immunarch-overlap.R +15 -1
  92. biopipen/scripts/tcr/Immunarch-spectratyping.R +10 -1
  93. biopipen/scripts/tcr/Immunarch-tracking.R +6 -0
  94. biopipen/scripts/tcr/Immunarch-vjjunc.R +33 -0
  95. biopipen/scripts/tcr/ScRepLoading.R +127 -0
  96. biopipen/scripts/tcr/TCRClusterStats.R +24 -7
  97. biopipen/scripts/tcr/TCRDock.py +10 -6
  98. biopipen/scripts/tcr/TESSA.R +6 -1
  99. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  100. biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
  101. biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
  102. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  103. biopipen/scripts/vcf/BcftoolsSort.py +4 -4
  104. biopipen/scripts/vcf/BcftoolsView.py +5 -5
  105. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  106. biopipen/scripts/vcf/VcfAnno.py +11 -11
  107. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  108. biopipen/scripts/vcf/VcfFilter.py +5 -5
  109. biopipen/scripts/vcf/VcfFix.py +7 -7
  110. biopipen/scripts/vcf/VcfFix_utils.py +12 -3
  111. biopipen/scripts/vcf/VcfIndex.py +3 -3
  112. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  113. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  114. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  115. biopipen/scripts/vcf/bcftools_utils.py +3 -3
  116. biopipen/scripts/web/Download.py +8 -4
  117. biopipen/scripts/web/DownloadList.py +5 -5
  118. biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
  119. biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
  120. biopipen/scripts/web/gcloud_common.py +1 -1
  121. biopipen/utils/gsea.R +96 -42
  122. biopipen/utils/misc.R +205 -7
  123. biopipen/utils/misc.py +17 -8
  124. biopipen/utils/plot.R +53 -17
  125. biopipen/utils/reference.py +11 -11
  126. biopipen/utils/repr.R +146 -0
  127. biopipen/utils/vcf.py +1 -1
  128. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/METADATA +9 -9
  129. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/RECORD +131 -122
  130. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/WHEEL +1 -1
  131. biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -139
  132. biopipen/scripts/scrna/SeuratPreparing-common.R +0 -452
  133. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -201
  134. {biopipen-0.32.1.dist-info → biopipen-0.33.0.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,8 @@
1
1
  from pathlib import Path
2
2
  from biopipen.utils.misc import run_command, logger
3
+ import os
3
4
  import numpy as np
5
+ import pandas as pd
4
6
  import scanpy
5
7
  import liana
6
8
  import liana.method.sc._liana_pipe as _liana_pipe
@@ -21,52 +23,87 @@ def _trimean(a, axis=0):
21
23
  _liana_pipe._trimean = _trimean
22
24
 
23
25
 
24
- sobjfile = Path({{in.sobjfile | repr}}) # pyright: ignore # noqa: E999
25
- outfile = Path({{out.outfile | repr}}) # pyright: ignore
26
- envs = {{envs | repr}} # pyright: ignore
26
+ sobjfile = Path({{in.sobjfile | quote}}) # pyright: ignore # noqa: E999
27
+ outfile = Path({{out.outfile | quote}}) # pyright: ignore
28
+ envs: dict = {{envs | dict}} # pyright: ignore
27
29
 
30
+ # https://github.com/h5py/h5py/issues/1082#issuecomment-1311498466
31
+ os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
28
32
  method = envs.pop("method")
29
33
  assay = envs.pop("assay")
30
34
  ncores = envs.pop("ncores")
31
35
  species = envs.pop("species")
32
36
  rscript = envs.pop("rscript")
37
+ subset = envs.pop("subset")
38
+ subset_using = envs.pop("subset_using", "auto")
39
+ if subset_using == "auto":
40
+ subset_using = "python" if subset and "[" in subset else "r"
41
+ split_by = envs.pop("split_by")
33
42
 
34
43
  if sobjfile.suffix.lower() == ".rds" or sobjfile.suffix.lower() == ".h5seurat":
44
+ logger.info("Converting the Seurat object to h5ad ...")
45
+
35
46
  annfile = outfile.parent / f"{sobjfile.stem}.h5ad"
36
- r_script_convert_to_anndata = f"""
37
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
38
- {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
39
-
40
- seurat_to_anndata(
41
- "{sobjfile}",
42
- "{annfile}",
43
- assay = {{ envs.assay | r }},
44
- log_info = log_info
45
- )
46
- """
47
+ if subset and subset_using == "r":
48
+ r_script_convert_to_anndata = (
49
+ "biopipen.utils::ConvertSeuratToAnnData"
50
+ f"({str(sobjfile)!r}, {str(annfile)!r}, "
51
+ f"assay = {{envs['assay'] | r}}, subset = {{envs['subset'] | r}})"
52
+ )
53
+ else:
54
+ r_script_convert_to_anndata = (
55
+ "biopipen.utils::ConvertSeuratToAnnData"
56
+ f"({str(sobjfile)!r}, {str(annfile)!r}, assay = {{envs['assay'] | r}})"
57
+ )
47
58
  run_command([rscript, "-e", r_script_convert_to_anndata], fg=True)
48
-
49
59
  sobjfile = annfile
60
+ elif subset and subset == "r":
61
+ raise ValueError(
62
+ "h5ad file is provided as input, ",
63
+ "'subset' can only be a 'python' expression (`envs.subset_using = 'python'`)."
64
+ )
50
65
 
51
66
  logger.info("Reading the h5ad file ...")
52
67
  adata = scanpy.read_h5ad(sobjfile)
53
68
 
69
+ if subset and subset_using == "python":
70
+ logger.info("Subsetting the data ...")
71
+ adata = adata[{{envs['subset']}}] # pyright: ignore
72
+
54
73
  method = method.lower()
55
74
  if method == "log2fc":
56
75
  method_fun = liana.mt.logfc
57
76
  else:
58
77
  method_fun = getattr(liana.mt, method)
59
78
 
60
- logger.info(f"Running {method} ...")
61
- envs["adata"] = adata
62
79
  envs["resource_name"] = "consensus" if species == "human" else "mouseconsensus"
63
80
  envs["n_jobs"] = ncores
64
81
  envs["inplace"] = True
65
82
  envs["verbose"] = True
66
83
  envs["key_added"] = "liana_ccc"
67
- method_fun(**envs)
68
84
 
69
- res = adata.uns['liana_ccc']
85
+ if split_by:
86
+ split_vals = adata.obs[split_by].unique()
87
+ result: pd.DataFrame = None # type: ignore
88
+ for split_val in split_vals:
89
+ logger.info(f"Running {method} for {split_by} = {split_val} ...")
90
+ adata_split = adata[adata.obs[split_by] == split_val]
91
+ envs["adata"] = adata_split
92
+
93
+ method_fun(**envs)
94
+ res = adata_split.uns['liana_ccc']
95
+ res[split_by] = split_val
96
+
97
+ if result is None:
98
+ result = res
99
+ else:
100
+ result = pd.concat([result, res], ignore_index=True)
101
+ else:
102
+ logger.info(f"Running {method} ...")
103
+ envs["adata"] = adata
104
+ method_fun(**envs)
105
+
106
+ result = adata.uns['liana_ccc']
70
107
 
71
108
  mag_score_names = {
72
109
  "cellphonedb": "lr_means",
@@ -93,9 +130,9 @@ spec_score_names = {
93
130
  }
94
131
 
95
132
  if mag_score_names[method] is not None:
96
- res['mag_score'] = res[mag_score_names[method]]
133
+ result['mag_score'] = result[mag_score_names[method]]
97
134
  if spec_score_names[method] is not None:
98
- res['spec_score'] = res[spec_score_names[method]]
135
+ result['spec_score'] = result[spec_score_names[method]]
99
136
 
100
137
  logger.info("Saving the result ...")
101
- res.to_csv(outfile, sep="\t", index=False)
138
+ result.to_csv(outfile, sep="\t", index=False)
@@ -368,9 +368,17 @@ do_case <- function(name, case) {
368
368
  width <- devpars$width %||% (400 + 120 + 100 * ngroups)
369
369
  # group_by names
370
370
  height <- devpars$height %||% (120 + 100 * cells_rows)
371
+
372
+ p <- wrap_plots(piecharts, ncol = 1, guides = "collect")
373
+
371
374
  piefile <- file.path(info$casedir, paste0(info$case_slug, ".png"))
372
375
  png(piefile, res = res, width = width, height = height)
373
- print(wrap_plots(piecharts, ncol = 1, guides = "collect"))
376
+ print(p)
377
+ dev.off()
378
+
379
+ piefile_pdf <- file.path(info$casedir, paste0(info$case_slug, ".pdf"))
380
+ pdf(piefile_pdf, width = width / res, height = height / res)
381
+ print(p)
374
382
  dev.off()
375
383
 
376
384
  log_info(" Plotting and saving heatmap ...")
@@ -411,7 +419,6 @@ do_case <- function(name, case) {
411
419
  hm_res <- hm_devpars$res %||% 100
412
420
  hm_width <- hm_devpars$width %||% (600 + 15 * length(unique(meta$seurat_clusters)) + extra_width)
413
421
  hm_height <- hm_devpars$height %||% (450 + 15 * cells_rows + extra_height)
414
- png(hmfile, res = hm_res, width = hm_width, height = hm_height)
415
422
  hm <- Heatmap(
416
423
  as.matrix(hmdata),
417
424
  name = "Size",
@@ -430,6 +437,12 @@ do_case <- function(name, case) {
430
437
  right_annotation = row_ha,
431
438
  top_annotation = ha
432
439
  )
440
+ png(hmfile, res = hm_res, width = hm_width, height = hm_height)
441
+ print(hm)
442
+ dev.off()
443
+
444
+ hmfile_pdf <- gsub(".png$", ".pdf", hmfile)
445
+ pdf(hmfile_pdf, width = hm_width / hm_res, height = hm_height / hm_res)
433
446
  print(hm)
434
447
  dev.off()
435
448
 
@@ -454,11 +467,11 @@ do_case <- function(name, case) {
454
467
  add_report(
455
468
  list(
456
469
  name = "Pie Charts",
457
- contents = list(list(kind = "image", src = piefile))
470
+ contents = list(list(kind = "image", src = piefile, download = piefile_pdf))
458
471
  ),
459
472
  list(
460
473
  name = "Heatmap",
461
- contents = list(list(src = hmfile, kind = "image"))
474
+ contents = list(list(src = hmfile, kind = "image", download = hmfile_pdf))
462
475
  ),
463
476
  list(
464
477
  name = "Distribution Table",
@@ -493,25 +506,37 @@ do_overlap <- function(section) {
493
506
  print(venn_p)
494
507
  dev.off()
495
508
 
509
+ venn_plot_pdf <- gsub(".png$", ".pdf", venn_plot)
510
+ pdf(venn_plot_pdf, width = 10, height = 6)
511
+ print(venn_p)
512
+ dev.off()
513
+
496
514
  upset_plot <- file.path(sec_dir, "upset.png")
497
515
  upset_p <- upset(fromList(overlap_cases))
498
516
  png(upset_plot, res = 100, width = 800, height = 600)
499
517
  print(upset_p)
500
518
  dev.off()
501
519
 
520
+ upset_plot_pdf <- gsub(".png$", ".pdf", upset_plot)
521
+ pdf(upset_plot_pdf, width = 8, height = 6)
522
+ print(upset_p)
523
+ dev.off()
524
+
502
525
  add_report(
503
526
  list(
504
527
  name = "Venn Plot",
505
528
  contents = list(list(
506
529
  kind = "image",
507
- src = venn_plot
530
+ src = venn_plot,
531
+ download = venn_plot_pdf
508
532
  ))
509
533
  ),
510
534
  list(
511
535
  name = "UpSet Plot",
512
536
  contents = list(list(
513
537
  kind = "image",
514
- src = upset_plot
538
+ src = upset_plot,
539
+ download = upset_plot_pdf
515
540
  ))
516
541
  ),
517
542
  h1 = "Overlapping Groups",