biopipen 0.34.7__py3-none-any.whl → 0.34.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.34.7"
1
+ __version__ = "0.34.8"
biopipen/ns/scrna.py CHANGED
@@ -2706,6 +2706,7 @@ class PseudoBulkDEG(Proc):
2706
2706
  analysis.
2707
2707
 
2708
2708
  Envs:
2709
+ ncores (type=int): Number of cores to use for parallelization.
2709
2710
  mutaters (type=json): Mutaters to mutate the metadata of the
2710
2711
  seurat object. Keys are the new column names and values are the
2711
2712
  expressions to mutate the columns. These new columns can be
@@ -2715,6 +2716,9 @@ class PseudoBulkDEG(Proc):
2715
2716
  each: The column name in metadata to separate the cells into different cases.
2716
2717
  When specified, the case will be expanded to multiple cases for
2717
2718
  each value in the column.
2719
+ cache (type=auto): Where to cache the results.
2720
+ If `True`, cache to `outdir` of the job. If `False`, don't cache.
2721
+ Otherwise, specify the directory to cache to.
2718
2722
  subset: An expression in string to subset the cells.
2719
2723
  aggregate_by: The column names in metadata to aggregate the cells.
2720
2724
  layer: The layer to pull and aggregate the data.
@@ -2844,7 +2848,9 @@ class PseudoBulkDEG(Proc):
2844
2848
  lang = config.lang.rscript
2845
2849
  script = "file://../scripts/scrna/PseudoBulkDEG.R"
2846
2850
  envs = {
2851
+ "ncores": config.misc.ncores,
2847
2852
  "mutaters": {},
2853
+ "cache": config.path.tmpdir,
2848
2854
  "each": None,
2849
2855
  "subset": None,
2850
2856
  "aggregate_by": None,
@@ -268,20 +268,22 @@ process_markers <- function(markers, info, case) {
268
268
  ui = "tabs"
269
269
  )
270
270
 
271
- for (plotname in names(case$marker_plots)) {
272
- plotargs <- case$marker_plots[[plotname]]
273
- plotargs$degs <- markers
274
- rownames(plotargs$degs) <- make.unique(markers$gene)
275
- plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
276
- do_call(VizDEGs, plotargs)
277
- reporter$add2(
278
- list(
279
- name = plotname,
280
- contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
281
- hs = c(info$section, info$name),
282
- hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
283
- ui = "tabs"
284
- )
271
+ if (nrow(markers) > 0) {
272
+ for (plotname in names(case$marker_plots)) {
273
+ plotargs <- case$marker_plots[[plotname]]
274
+ plotargs$degs <- markers
275
+ rownames(plotargs$degs) <- make.unique(markers$gene)
276
+ plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
277
+ do_call(VizDEGs, plotargs)
278
+ reporter$add2(
279
+ list(
280
+ name = plotname,
281
+ contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
282
+ hs = c(info$section, info$name),
283
+ hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
284
+ ui = "tabs"
285
+ )
286
+ }
285
287
  }
286
288
 
287
289
  # Do enrichment analysis
@@ -399,6 +401,10 @@ process_allmarkers <- function(markers, plotcases, casename, groupname) {
399
401
  plotargs <- plotcases[[plotname]]
400
402
  plotargs$degs <- markers
401
403
  plotargs$outprefix <- file.path(info$prefix, slugify(plotname))
404
+ if (identical(plotargs$plot_type, "heatmap")) {
405
+ plotargs$show_row_names = plotargs$show_row_names %||% TRUE
406
+ plotargs$show_column_names = plotargs$show_column_names %||% TRUE
407
+ }
402
408
  do_call(VizDEGs, plotargs)
403
409
  reporter$add2(
404
410
  list(
@@ -547,7 +553,9 @@ run_case <- function(name) {
547
553
  attr(markers, "group_by") <- each
548
554
  attr(markers, "ident_1") <- NULL
549
555
  attr(markers, "ident_2") <- NULL
550
- process_allmarkers(markers, allmarker_plots, name, each)
556
+ if (!is.null(markers) && nrow(markers) > 0) {
557
+ process_allmarkers(markers, allmarker_plots, name, each)
558
+ }
551
559
  }
552
560
 
553
561
  if (length(overlaps) > 0) {
@@ -557,7 +565,7 @@ run_case <- function(name) {
557
565
 
558
566
  }
559
567
 
560
- if (!is.null(enriches)) {
568
+ if (!is.null(enriches) && length(enriches) > 0) {
561
569
  log$info("- Summarizing enrichments in subcases (by each: {each}) ...")
562
570
  if (!is.data.frame(enriches)) {
563
571
  each_levels <- names(enriches)
@@ -573,7 +581,7 @@ run_case <- function(name) {
573
581
  enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
574
582
  }
575
583
 
576
- if (length(allenrich_plots) > 0) {
584
+ if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
577
585
  log$info("- Visualizing all enrichments together ...")
578
586
  process_allenriches(enriches, allenrich_plots, name, each)
579
587
  }
@@ -636,7 +644,9 @@ run_case <- function(name) {
636
644
  ))
637
645
 
638
646
  if (!is.null(original_case) && !is.null(cases[[original_case]])) {
639
- markers[[each_name]] <- each
647
+ if (nrow(markers) > 0) {
648
+ markers[[each_name]] <- each
649
+ }
640
650
  cases[[original_case]]$markers[[each]] <<- markers
641
651
  cases[[original_case]]$enriches[[each]] <<- enrich
642
652
  }
@@ -8,7 +8,9 @@ outdir <- {{out.outdir | r}}
8
8
  joboutdir <- {{job.outdir | r}}
9
9
  each <- {{envs.each | r}}
10
10
  subset <- {{envs.subset | r}}
11
+ ncores <- {{envs.ncores | r}}
11
12
  mutaters <- {{envs.mutaters | r}}
13
+ cache <- {{ envs.cache | r }}
12
14
  aggregate_by <- {{envs.aggregate_by | r}}
13
15
  layer <- {{envs.layer | r}}
14
16
  assay <- {{envs.assay | r}}
@@ -35,6 +37,7 @@ overlaps <- {{ envs.overlaps | r }}
35
37
  cases <- {{envs.cases | r}}
36
38
 
37
39
  aggregate_by <- unique(c(aggregate_by, group_by, paired_by, each))
40
+ if (isTRUE(cache)) { cache <- joboutdir }
38
41
 
39
42
  log <- get_logger()
40
43
  reporter <- get_reporter()
@@ -74,10 +77,12 @@ defaults <- list(
74
77
  ident_1 = ident_1,
75
78
  ident_2 = ident_2,
76
79
  dbs = dbs,
80
+ ncores = ncores,
77
81
  sigmarkers = sigmarkers,
78
82
  enrich_style = enrich_style,
79
83
  paired_by = paired_by,
80
84
  tool = tool,
85
+ cache = cache,
81
86
  allmarker_plots_defaults = allmarker_plots_defaults,
82
87
  allmarker_plots = allmarker_plots,
83
88
  allenrich_plots_defaults = allenrich_plots_defaults,
@@ -181,6 +186,7 @@ expand_each <- function(name, case) {
181
186
  if (length(case$overlaps) > 0 || length(case$allmarker_plots) > 0 || length(case$allenrich_plots) > 0) {
182
187
  ovcase <- case
183
188
 
189
+ ovcase$allexprs <- list()
184
190
  ovcase$markers <- list()
185
191
  ovcase$allmarker_plots <- lapply(
186
192
  ovcase$allmarker_plots,
@@ -533,18 +539,21 @@ run_case <- function(name) {
533
539
  "dbs", "sigmarkers", "allmarker_plots", "allenrich_plots", "marker_plots", "enrich_plots",
534
540
  "overlaps", "original_case", "markers", "enriches", "each_name", "each", "enrich_style",
535
541
  "aggregate_by", "subset", "layer", "assay", "group_by", "ident_1", "ident_2", "original_subset",
536
- "paired_by", "tool", "error",
542
+ "paired_by", "tool", "error", "ncores", "cache", "allexprs",
537
543
  allow_nonexisting = TRUE
538
544
  )
539
545
 
540
546
  if (!is.null(markers) || !is.null(enriches)) {
541
- if (!is.null(markers)) { # It is the overlap/allmarker case
542
- log$info("- Summarizing DEGs in subcases (by each: {each}) ...")
547
+ if (!is.null(markers) && length(markers) > 0) {
548
+ log$info("Summarizing DEGs in subcases (by each: {each}) ...")
543
549
  # handle the overlaps / allmarkers analysis here
544
550
  if (!is.data.frame(markers)) {
545
551
  each_levels <- names(markers)
546
552
  markers <- do_call(rbind, lapply(each_levels, function(x) {
547
553
  markers_df <- markers[[x]]
554
+ if (is.null(markers_df) || nrow(markers_df) == 0) {
555
+ return(NULL)
556
+ }
548
557
  if (nrow(markers_df) > 0) {
549
558
  markers_df[[each]] <- x
550
559
  } else {
@@ -556,17 +565,17 @@ run_case <- function(name) {
556
565
  }
557
566
  # gene, p_val, avg_log2FC, pct.1, pct.2, p_val_adj, diff_pct, <each>
558
567
 
568
+ if (!is.data.frame(allexprs)) {
569
+ meta <- do_call(rbind, lapply(allexprs, attr, "meta"))
570
+ allexprs <- do_call(cbind, allexprs)
571
+ } else {
572
+ meta <- attr(allexprs, "meta")
573
+ }
574
+
559
575
  if (length(allmarker_plots) > 0) {
560
- log$info("- Visualizing all DEGs together ...")
561
- exprs <- AggregateExpressionPseudobulk(
562
- srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
563
- subset = original_subset, log = log
564
- )
565
- attr(markers, "object") <- AggregateExpressionPseudobulk(
566
- srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
567
- subset = original_subset, log = log
568
- )
569
- attr(markers, "meta") <- attr(exprs, "meta")
576
+ log$info("Visualizing all DEGs together ...")
577
+ attr(markers, "object") <- allexprs
578
+ attr(markers, "meta") <- meta
570
579
  attr(markers, "group_by") <- each
571
580
  attr(markers, "paired_by") <- paired_by
572
581
  attr(markers, "ident_1") <- NULL
@@ -575,18 +584,21 @@ run_case <- function(name) {
575
584
  }
576
585
 
577
586
  if (length(overlaps) > 0) {
578
- log$info("- Visualizing overlaps between subcases ...")
587
+ log$info("Visualizing overlaps between subcases ...")
579
588
  process_overlaps(markers, overlaps, name, each)
580
589
  }
581
590
 
582
591
  }
583
592
 
584
- if (!is.null(enriches)) {
585
- log$info("- Summarizing enrichments in subcases (by each: {each}) ...")
593
+ if (!is.null(enriches) && length(enriches) > 0) {
594
+ log$info("Summarizing enrichments in subcases (by each: {each}) ...")
586
595
  if (!is.data.frame(enriches)) {
587
596
  each_levels <- names(enriches)
588
597
  enriches <- do_call(rbind, lapply(each_levels, function(x) {
589
598
  enrich_df <- enriches[[x]]
599
+ if (is.null(enrich_df) || nrow(enrich_df) == 0) {
600
+ return(NULL)
601
+ }
590
602
  if (nrow(enrich_df) > 0) {
591
603
  enrich_df[[each]] <- x
592
604
  } else {
@@ -594,11 +606,13 @@ run_case <- function(name) {
594
606
  }
595
607
  enrich_df
596
608
  }))
597
- enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
609
+ if (!is.null(enriches) && nrow(enriches) > 0) {
610
+ enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
611
+ }
598
612
  }
599
613
 
600
- if (length(allenrich_plots) > 0) {
601
- log$info("- Visualizing all enrichments together ...")
614
+ if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
615
+ log$info("Visualizing all enrichments together ...")
602
616
  process_allenriches(enriches, allenrich_plots, name, each)
603
617
  }
604
618
  }
@@ -615,7 +629,8 @@ run_case <- function(name) {
615
629
  {
616
630
  RunDEGAnalysis(
617
631
  exprs, group_by = group_by, ident_1 = ident_1, ident_2 = ident_2,
618
- paired_by = paired_by, tool = tool, log = log
632
+ paired_by = paired_by, tool = tool, log = log, ncores = ncores,
633
+ cache = cache
619
634
  )
620
635
  }, error = function(e) {
621
636
  if (error) {
@@ -646,9 +661,12 @@ run_case <- function(name) {
646
661
  ))
647
662
 
648
663
  if (!is.null(original_case) && !is.null(cases[[original_case]])) {
649
- markers[[each_name]] <- each
664
+ if (!is.null(markers)) {
665
+ markers[[each_name]] <- each
666
+ }
650
667
  cases[[original_case]]$markers[[each]] <<- markers
651
668
  cases[[original_case]]$enriches[[each]] <<- enrich
669
+ cases[[original_case]]$allexprs[[each]] <<- exprs
652
670
  }
653
671
 
654
672
  invisible()
@@ -82,13 +82,13 @@ expand_each <- function(name, case) {
82
82
  }
83
83
 
84
84
  if (length(cases) == 0 && name == "GSEA") {
85
- name <- case$each
85
+ prefix <- case$each
86
86
  } else {
87
- name <- paste0(name, " (", case$each, ")")
87
+ prefix <- paste0(name, " (", case$each, ")")
88
88
  }
89
89
 
90
90
  for (each in eachs) {
91
- newname <- paste0(name, "::", each)
91
+ newname <- paste0(prefix, "::", each)
92
92
  newcase <- case
93
93
 
94
94
  newcase$original_case <- paste0(name, " (all ", case$each,")")
@@ -144,6 +144,11 @@ do_case <- function(name) {
144
144
 
145
145
  if (!is.null(case$gseas)) {
146
146
 
147
+ if (length(case$gseas) == 0) {
148
+ log$warn(" No GSEA results found for case {name}. Skipping.")
149
+ return(invisible(NULL))
150
+ }
151
+
147
152
  each_levels <- names(case$gseas)
148
153
  gseas <- do_call(rbind, lapply(each_levels, function(x) {
149
154
  gsea_df <- case$gseas[[x]]
@@ -242,25 +247,16 @@ do_case <- function(name) {
242
247
  quote = FALSE
243
248
  )
244
249
  if (all(is.na(ranks))) {
245
- if (length(allclasses) < 100) {
246
- log$warn(" Ignoring this case because all gene ranks are NA and there are <100 cells.")
247
- reporter$add2(
248
- list(
249
- kind = "error",
250
- content = paste0("Not enough cells (n = ", length(allclasses), ") to run fgsea.")
251
- ),
252
- hs = c(info$section, info$name)
253
- )
254
- return(NULL)
255
- } else {
256
- stop(paste0(
257
- "All gene ranks are NA (# cells = ",
258
- length(allclasses),
259
- "). ",
260
- "It's probably due to high missing rate in the data. ",
261
- "You may want to try a different `envs$method` for pre-ranking."
262
- ))
263
- }
250
+ log$warn(" All gene ranks are NA. It's probably due to high missing rate in the data.")
251
+ log$warn(" Case ignored, you may also try a different ranking method.")
252
+ reporter$add2(
253
+ list(
254
+ kind = "error",
255
+ content = "All gene ranks are NA. It's probably due to high missing rate in the data."
256
+ ),
257
+ hs = c(info$section, info$name)
258
+ )
259
+ return(invisible(NULL))
264
260
  }
265
261
 
266
262
  # run fgsea
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: biopipen
3
- Version: 0.34.7
3
+ Version: 0.34.8
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
@@ -1,4 +1,4 @@
1
- biopipen/__init__.py,sha256=vVRUKRt0zUNKxfGQQE5WrQiVWQ-bg4UrgyEGX7LclcA,23
1
+ biopipen/__init__.py,sha256=R2kKZIbRrNhrmdllokG5_J0gtqktdwFV00CiImolksE,23
2
2
  biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
4
4
  biopipen/core/config.toml,sha256=lZV_vbYWk6uqm19ZWJcsZCcSNqAdIfN2fOfamzxZpg4,2148
@@ -22,7 +22,7 @@ biopipen/ns/plot.py,sha256=N41_izb6zi-XArUly5WhLebapNXbTNSgGlOCCwtrDlY,18282
22
22
  biopipen/ns/protein.py,sha256=YJtlKoHI2p5yHdxKeQnNtm5QrbxDGOq1UXOdt_7tlTs,6391
23
23
  biopipen/ns/regulatory.py,sha256=gJjGVpJrdv-rg2t5UjK4AGuvtLNymaNYNvoD8PhlbvE,15929
24
24
  biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
25
- biopipen/ns/scrna.py,sha256=ELhCbY2Vu8qHmDHlrI32gyaOxDO2ugFLz4WIV9kARfQ,144750
25
+ biopipen/ns/scrna.py,sha256=4dqgsj1cQGFWsJbrbdVF6ElaELmLBg76RlqqGmsW1iA,145087
26
26
  biopipen/ns/scrna_metabolic_landscape.py,sha256=EwLMrsj_pTqvyAgtHLoishjQxCg_j8n5OofuTofUph0,22096
27
27
  biopipen/ns/snp.py,sha256=iXWrw7Lmhf4_ct57HGT7JGTClCXUD4sZ2FzOgsC2pTg,28123
28
28
  biopipen/ns/stats.py,sha256=DlPyK5Vsg6ZEkV9SDS3aAw21eXzvOHgqeZDkXPhg7go,20509
@@ -155,13 +155,13 @@ biopipen/scripts/scrna/ExprImputation-rmagic.R,sha256=ePgbMZ_3bKbeUrjsMdkdtBM_MS
155
155
  biopipen/scripts/scrna/ExprImputation-scimpute.R,sha256=MI_bYfvCDKJsuGntUxfx_-NdrssBoQgL95-DGwJVE5s,1191
156
156
  biopipen/scripts/scrna/ExprImputation.R,sha256=GcdZJpkDpq88hRQjtLZY5-byp8V43stEFm5T-pQbU6A,319
157
157
  biopipen/scripts/scrna/LoomTo10X.R,sha256=c6F0p1udsL5UOlb84-53K5BsjSDWkdFyYTt5NQmlIec,1059
158
- biopipen/scripts/scrna/MarkersFinder.R,sha256=A-YCJ2WogU2QR8PqVn71lXCP63Vq1sMyAAIhqZYYawg,24278
158
+ biopipen/scripts/scrna/MarkersFinder.R,sha256=-W0rwS4IlR6DsY9gzGBgBN3aYD_7Tyseg2e8mxM6xA0,24796
159
159
  biopipen/scripts/scrna/MetaMarkers.R,sha256=BgYaWYEj6obwqaZaDWqNPtxb1IEEAnXAeBE0Ji9PvBA,12426
160
160
  biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256=-tByCPk7i070LynAb0z2ANeRxr1QqiKP0dfrJm52jH4,4198
161
- biopipen/scripts/scrna/PseudoBulkDEG.R,sha256=Y5OuVCaIIppBqMxxXM3HpJQk5kA42wSgbBBIC1Rr1s0,24608
161
+ biopipen/scripts/scrna/PseudoBulkDEG.R,sha256=IuM4hl-KHZ5aaaTqZeylw4b1ZenMZaY4qobD5qxAlHs,25199
162
162
  biopipen/scripts/scrna/RadarPlots.R,sha256=Kn1E-hpczuujpgNjR8MqeIIVN-S3PbpmfcKWGKcNCVY,14546
163
163
  biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtqONbk,2958
164
- biopipen/scripts/scrna/ScFGSEA.R,sha256=EyRbsH5d1daIxtOHjYz24Udmv1PhV0nUC9HqEtzRnpE,11584
164
+ biopipen/scripts/scrna/ScFGSEA.R,sha256=Q3_fmVy1OGan_EHo6EHgoHa6Zgfl_i0wUv_KrwammCo,11440
165
165
  biopipen/scripts/scrna/ScSimulation.R,sha256=q0-dXD9px1cApc_TxGmR-OdNHE8W1VSVWfSI57B96bo,1697
166
166
  biopipen/scripts/scrna/ScVelo.py,sha256=SPUZFgZW1Zhw-bnjJo98RK0vpuNFODQ8Q3eTguNc84k,21359
167
167
  biopipen/scripts/scrna/Seurat2AnnData.R,sha256=F8g5n2CqX4-KBggxd8ittz8TejYuqqNLMudAHdFt1QM,184
@@ -284,7 +284,7 @@ biopipen/utils/misc.py,sha256=pDZ-INWVNqHuXYvcjmu8KqNAigkh2lsHy0BxX44CPvc,4048
284
284
  biopipen/utils/reference.py,sha256=Oc6IlA1giLxymAuI7DO-IQLHQ7-DbsWzOQE86oTDfMU,5955
285
285
  biopipen/utils/reporter.py,sha256=VwLl6xyVDWnGY7NEXyqBlkW8expKJoNQ5iTyZSELf5c,4922
286
286
  biopipen/utils/vcf.py,sha256=MmMbAtLUcKPp02jUdk9TzuET2gWSeoWn7xgoOXFysK0,9393
287
- biopipen-0.34.7.dist-info/METADATA,sha256=uR3Q2oygeFSoT96Lp0wQGCcigCJGaIyzbYzdJ2wlWVw,975
288
- biopipen-0.34.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
289
- biopipen-0.34.7.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
290
- biopipen-0.34.7.dist-info/RECORD,,
287
+ biopipen-0.34.8.dist-info/METADATA,sha256=uEU4Tv61Ui1QMIK5aUfQJIu4beg4mYyWwhfZ2IQ9M04,975
288
+ biopipen-0.34.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
289
+ biopipen-0.34.8.dist-info/entry_points.txt,sha256=BYqHGBQJxyFDNLYqgH64ycI5PYwnlqwYcCFsMvJgzAU,653
290
+ biopipen-0.34.8.dist-info/RECORD,,