biopipen 0.27.6__py3-none-any.whl → 0.27.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.27.6"
1
+ __version__ = "0.27.8"
biopipen/core/filters.py CHANGED
@@ -235,8 +235,8 @@ def _render_fgsea(
235
235
  with Path(cont["dir"]).joinpath("fgsea.txt").open() as f:
236
236
  next(f) # skip header
237
237
  for line in f:
238
- pathway, _ = line.split("\t", 1)
239
- pathways.append(pathway)
238
+ items = line.strip().split("\t")
239
+ pathways.append((items[0], items[-1]))
240
240
  if len(pathways) >= n_pathways:
241
241
  break
242
242
 
@@ -263,6 +263,7 @@ def _render_fgsea(
263
263
  {
264
264
  "kind": "table",
265
265
  "src": str(Path(cont["dir"]).joinpath("fgsea.txt")),
266
+ "data": {"excluded": {"slug"}},
266
267
  }
267
268
  ],
268
269
  },
@@ -274,10 +275,10 @@ def _render_fgsea(
274
275
  "ui": "table_of_images",
275
276
  "contents": [
276
277
  {
277
- "src": str(Path(cont["dir"]) / f"fgsea_{pw.replace('/', '-')}.png"),
278
+ "src": str(Path(cont["dir"]) / f"fgsea_{slug}.png"),
278
279
  "title": pw,
279
280
  }
280
- for pw in pathways
281
+ for pw, slug in pathways
281
282
  ]
282
283
  },
283
284
  ]
biopipen/ns/scrna.py CHANGED
@@ -1241,7 +1241,7 @@ class TopExpressingGenes(Proc):
1241
1241
  }
1242
1242
 
1243
1243
 
1244
- class ExprImpution(Proc):
1244
+ class ExprImputation(Proc):
1245
1245
  """This process imputes the dropout values in scRNA-seq data.
1246
1246
 
1247
1247
  It takes the Seurat object as input and outputs the Seurat object with
@@ -1317,13 +1317,13 @@ class ExprImpution(Proc):
1317
1317
  },
1318
1318
  "alra_args": {},
1319
1319
  }
1320
- script = "file://../scripts/scrna/ExprImpution.R"
1320
+ script = "file://../scripts/scrna/ExprImputation.R"
1321
1321
 
1322
1322
 
1323
1323
  class SCImpute(Proc):
1324
1324
  """Impute the dropout values in scRNA-seq data.
1325
1325
 
1326
- Deprecated. Use `ExprImpution` instead.
1326
+ Deprecated. Use `ExprImputation` instead.
1327
1327
 
1328
1328
  Input:
1329
1329
  infile: The input file for imputation
@@ -1769,13 +1769,18 @@ class SeuratMap2Ref(Proc):
1769
1769
  sobjfile: The seurat object
1770
1770
 
1771
1771
  Output:
1772
- outfile: The rds file of seurat object with cell type annotated
1772
+ outfile: The rds file of seurat object with cell type annotated.
1773
+ Note that the reduction name will be `ref.umap` for the mapping.
1774
+ To visualize the mapping, you should use `ref.umap` as the reduction name.
1773
1775
 
1774
1776
  Envs:
1775
1777
  ncores (type=int;order=-100): Number of cores to use.
1776
- Used in `future::plan(strategy = "multicore", workers = <ncores>)`
1778
+ When `split_by` is used, this will be the number of cores for each object to map to the reference.
1779
+ When `split_by` is not used, this is used in `future::plan(strategy = "multicore", workers = <ncores>)`
1777
1780
  to parallelize some Seurat procedures.
1778
- See also: <https://satijalab.org/seurat/articles/future_vignette.html>
1781
+ See also: <https://satijalab.org/seurat/archive/v3.0/future_vignette.html>
1782
+ mutaters (type=json): The mutaters to mutate the metadata.
1783
+ This is helpful when we want to create new columns for `split_by`.
1779
1784
  use: A column name of metadata from the reference
1780
1785
  (e.g. `celltype.l1`, `celltype.l2`) to transfer to the query as the
1781
1786
  cell types (ident) for downstream analysis. This field is required.
@@ -1787,16 +1792,29 @@ class SeuratMap2Ref(Proc):
1787
1792
  `Seurat::LoadH5Seurat()`.
1788
1793
  The file type is determined by the extension. `.rds` or `.RDS` for
1789
1794
  RDS file, `.h5seurat` or `.h5` for h5seurat file.
1795
+ refnorm (choice): Normalization method the reference used. The same method will be used for the query.
1796
+ - NormalizeData: Using [`NormalizeData`](https://satijalab.org/seurat/reference/normalizedata).
1797
+ - SCTransform: Using [`SCTransform`](https://satijalab.org/seurat/reference/sctransform).
1798
+ - auto: Automatically detect the normalization method.
1799
+ If the default assay of reference is `SCT`, then `SCTransform` will be used.
1800
+ split_by: The column name in metadata to split the query into multiple objects.
1801
+ This helps when the original query is too large to process.
1790
1802
  SCTransform (ns): Arguments for [`SCTransform()`](https://satijalab.org/seurat/reference/sctransform)
1791
1803
  - do-correct-umi (flag): Place corrected UMI matrix in assay counts layer?
1792
1804
  - do-scale (flag): Whether to scale residuals to have unit variance?
1793
1805
  - do-center (flag): Whether to center residuals to have mean zero?
1794
1806
  - <more>: See <https://satijalab.org/seurat/reference/sctransform>.
1795
1807
  Note that the hyphen (`-`) will be transformed into `.` for the keys.
1808
+ NormalizeData (ns): Arguments for [`NormalizeData()`](https://satijalab.org/seurat/reference/normalizedata)
1809
+ - normalization-method: Normalization method.
1810
+ - <more>: See <https://satijalab.org/seurat/reference/normalizedata>.
1811
+ Note that the hyphen (`-`) will be transformed into `.` for the keys.
1796
1812
  FindTransferAnchors (ns): Arguments for [`FindTransferAnchors()`](https://satijalab.org/seurat/reference/findtransferanchors)
1797
1813
  - normalization-method (choice): Name of normalization method used.
1798
1814
  - LogNormalize: Log-normalize the data matrix
1799
1815
  - SCT: Scale data using the SCTransform method
1816
+ - auto: Automatically detect the normalization method.
1817
+ See `envs.refnorm`.
1800
1818
  - reference-reduction: Name of dimensional reduction to use from the reference if running the pcaproject workflow.
1801
1819
  Optionally enables reuse of precomputed reference dimensional reduction.
1802
1820
  - <more>: See <https://satijalab.org/seurat/reference/findtransferanchors>.
@@ -1822,14 +1840,19 @@ class SeuratMap2Ref(Proc):
1822
1840
  "ncores": config.misc.ncores,
1823
1841
  "use": None,
1824
1842
  "ident": "seurat_clusters",
1843
+ "mutaters": {},
1825
1844
  "ref": None,
1845
+ "refnorm": "auto",
1846
+ "split_by": None,
1826
1847
  "SCTransform": {
1827
1848
  "do-correct-umi": False,
1828
1849
  "do-scale": False,
1829
1850
  "do-center": True,
1830
1851
  },
1852
+ "NormalizeData": {
1853
+ "normalization-method": "LogNormalize",
1854
+ },
1831
1855
  "FindTransferAnchors": {
1832
- "normalization-method": "SCT",
1833
1856
  "reference-reduction": "spca",
1834
1857
  },
1835
1858
  "MapQuery": {
@@ -566,8 +566,8 @@ class ScrnaMetabolicLandscape(ProcGroup):
566
566
  input_data = lambda ch: tibble(
567
567
  srtobj=ch.iloc[:, 0],
568
568
  metafile=[None],
569
- mutaters=[self.opts.mutaters],
570
569
  )
570
+ envs = {"mutaters": self.opts.mutaters}
571
571
 
572
572
  return MetabolicSeuratMetadataMutater
573
573
 
@@ -577,10 +577,10 @@ class ScrnaMetabolicLandscape(ProcGroup):
577
577
  if self.opts.noimpute:
578
578
  return self.p_mutater
579
579
 
580
- from .scrna import ExprImpution
580
+ from .scrna import ExprImputation
581
581
 
582
582
  @annotate.format_doc(indent=3)
583
- class MetabolicExprImpution(ExprImpution):
583
+ class MetabolicExprImputation(ExprImputation):
584
584
  """{{Summary}}
585
585
 
586
586
  You can turn off the imputation by setting the `noimpute` option
@@ -588,7 +588,7 @@ class ScrnaMetabolicLandscape(ProcGroup):
588
588
  """
589
589
  requires = self.p_mutater
590
590
 
591
- return MetabolicExprImpution
591
+ return MetabolicExprImputation
592
592
 
593
593
  @ProcGroup.add_proc
594
594
  def p_pathway_activity(self) -> Type[Proc]:
@@ -0,0 +1,7 @@
1
+ {% if envs.tool == "rmagic" %}
2
+ {% include biopipen_dir + "/scripts/scrna/ExprImputation-rmagic.R" %}
3
+ {% elif envs.tool == "scimpute" %}
4
+ {% include biopipen_dir + "/scripts/scrna/ExprImputation-scimpute.R" %}
5
+ {% elif envs.tool == "alra" %}
6
+ {% include biopipen_dir + "/scripts/scrna/ExprImputation-alra.R" %}
7
+ {% endif %}
@@ -38,7 +38,7 @@ do_one_stats = function(name) {
38
38
  df_cells = df_cells %>% filter(!!rlang::parse_expr(case$subset))
39
39
  }
40
40
 
41
- select_cols = c(case$ident, case$group.by, case$split.by)
41
+ select_cols = unique(c(case$ident, case$group.by, case$split.by))
42
42
  if (!is.null(case$split.by)) {
43
43
  plot_df = do_call(rbind, lapply(group_split(
44
44
  df_cells %>% select(all_of(select_cols)),
@@ -1,5 +1,6 @@
1
1
  source("{{biopipen_dir}}/utils/misc.R")
2
2
 
3
+ library(parallel)
3
4
  library(Seurat)
4
5
  library(SeuratDisk)
5
6
  library(rlang)
@@ -12,8 +13,12 @@ outfile = {{out.outfile | r}}
12
13
  use = {{envs.use | r}}
13
14
  ident = {{envs.ident | r}}
14
15
  ref = {{envs.ref | r}}
16
+ refnorm = {{envs.refnorm | r}}
15
17
  ncores = {{envs.ncores | r}}
18
+ split_by = {{envs.split_by | r}}
19
+ mutaters = {{envs.mutaters | r}}
16
20
  sctransform_args = {{envs.SCTransform | r: todot="-"}}
21
+ normalizedata_args = {{envs.NormalizeData | r: todot="-"}}
17
22
  findtransferanchors_args = {{envs.FindTransferAnchors | r: todot="-"}}
18
23
  mappingscore_args = {{envs.MappingScore | r: todot="-"}}
19
24
  mapquery_args = {{envs.MapQuery | r: todot="-"}}
@@ -34,8 +39,10 @@ if (is.null(mapquery_args$refdata) || length(mapquery_args$refdata) == 0) {
34
39
  mapquery_args$refdata[[use]] = use
35
40
 
36
41
  outdir = dirname(outfile)
37
- options(future.globals.maxSize = 80000 * 1024^2)
38
- plan(strategy = "multicore", workers = ncores)
42
+ if (is.null(split_by)) {
43
+ options(future.globals.maxSize = 80000 * 1024^2)
44
+ future::plan(strategy = "multicore", workers = ncores)
45
+ }
39
46
 
40
47
  .expand_dims = function(args, name = "dims") {
41
48
  # Expand dims from 30 to 1:30
@@ -56,52 +63,191 @@ if (endsWith(ref, ".rds") || endsWith(ref, ".RDS")) {
56
63
  reference = LoadH5Seurat(ref)
57
64
  }
58
65
 
66
+ if (refnorm == "auto" && DefaultAssay(reference) == "SCT") {
67
+ refnorm = "SCTransform"
68
+ }
69
+ log_info(" Normalization method used: {refnorm}")
70
+ if (refnorm == "SCTransform") {
71
+ findtransferanchors_args$normalization.method = "SCT"
72
+ } else if (refnorm == "NormalizeData") {
73
+ findtransferanchors_args$normalization.method = "LogNormalize"
74
+ } else {
75
+ stop("Unknown normalization method: {refnorm}")
76
+ }
77
+
59
78
  # Load Seurat object
60
79
  log_info("- Loading Seurat object")
61
80
  sobj = readRDS(sobjfile)
62
81
 
82
+ if (!is.null(mutaters) && length(mutaters) > 0) {
83
+ log_info("- Applying mutaters")
84
+ sobj@meta.data <- sobj@meta.data %>% mutate(!!!lapply(mutaters, parse_expr))
85
+ }
86
+
87
+ if (!is.null(split_by)) {
88
+ # check if each split has more than 100 cells
89
+ cellno = table(sobj@meta.data[[split_by]])
90
+ cellno = cellno[cellno < 100]
91
+ if (length(cellno) > 0) {
92
+ # stop and print the splits with # cells
93
+ stop(paste0(
94
+ "The following splits have less than 100 cells: \n",
95
+ paste0("- ", names(cellno), ": ", cellno, collapse = "\n"),
96
+ "\n\n",
97
+ "You can use `envs.mutaters` to merge these splits and use `newsplit` as `envs.split_by`: \n",
98
+ "> mutaters = {\n",
99
+ "> newsplit = \"if_else(oldsplit %in% c('split1', 'split2'), 'mergedsplit', oldsplit)\"\n",
100
+ "> }\n"
101
+ ))
102
+ }
103
+ sobj = SplitObject(sobj, split.by = split_by)
104
+ }
105
+
63
106
  # Normalize data
64
107
  log_info("- Normalizing data")
65
- sctransform_args$object = sobj
66
- sctransform_args$residual.features = rownames(x = reference)
67
- query = do_call(SCTransform, sctransform_args)
108
+ if (refnorm == "SCTransform") {
109
+ log_info(" Using SCTransform normalization")
110
+ sctransform_args$residual.features = rownames(x = reference)
111
+ if (is.null(split_by)) {
112
+ sctransform_args$object = sobj
113
+ query = do_call(SCTransform, sctransform_args)
114
+ } else {
115
+ query = mclapply(
116
+ X = sobj,
117
+ FUN = function(x) {
118
+ sctransform_args$object = x
119
+ do_call(SCTransform, sctransform_args)
120
+ },
121
+ mc.cores = ncores
122
+ )
123
+ if (any(unlist(lapply(query, class)) == "try-error")) {
124
+ stop(paste0("\nmclapply (SCTransform) error:", query))
125
+ }
126
+ }
127
+ } else {
128
+ log_info(" Using NormalizeData normalization")
129
+ if (is.null(split_by)) {
130
+ normalizedata_args$object = sobj
131
+ query = do_call(NormalizeData, normalizedata_args)
132
+ } else {
133
+ query = mclapply(
134
+ X = sobj,
135
+ FUN = function(x) {
136
+ normalizedata_args$object = x
137
+ do_call(NormalizeData, normalizedata_args)
138
+ },
139
+ mc.cores = ncores
140
+ )
141
+ if (any(unlist(lapply(query, class)) == "try-error")) {
142
+ stop(paste0("\nmclapply (NormalizeData) error:", query))
143
+ }
144
+ }
145
+ }
68
146
 
69
147
  # Find anchors between query and reference
70
148
  log_info("- Finding anchors")
71
149
  findtransferanchors_args$reference = reference
72
- findtransferanchors_args$query = query
73
- anchors = do_call(FindTransferAnchors, findtransferanchors_args)
150
+ if (is.null(split_by)) {
151
+ findtransferanchors_args$query = query
152
+ anchors = do_call(FindTransferAnchors, findtransferanchors_args)
153
+ } else {
154
+ anchors = mclapply(
155
+ X = query,
156
+ FUN = function(x) {
157
+ findtransferanchors_args$query = x
158
+ do_call(FindTransferAnchors, findtransferanchors_args)
159
+ },
160
+ mc.cores = ncores
161
+ )
162
+ if (any(unlist(lapply(anchors, class)) == "try-error")) {
163
+ stop(paste0("\nmclapply (FindTransferAnchors) error:", anchors))
164
+ }
165
+ }
74
166
 
75
167
  # Map query to reference
76
168
  log_info("- Mapping query to reference")
77
169
  mapquery_args$reference = reference
78
- mapquery_args$query = query
79
- mapquery_args$anchorset = anchors
80
- query = do_call(MapQuery, mapquery_args)
170
+ if (is.null(split_by)) {
171
+ mapquery_args$query = query
172
+ mapquery_args$anchorset = anchors
173
+ query = do_call(MapQuery, mapquery_args)
174
+ } else {
175
+ query = mclapply(
176
+ X = seq_along(query),
177
+ FUN = function(i) {
178
+ mapquery_args$query = query[[i]]
179
+ mapquery_args$anchorset = anchors[[i]]
180
+ do_call(MapQuery, mapquery_args)
181
+ },
182
+ mc.cores = ncores
183
+ )
184
+ if (any(unlist(lapply(query, class)) == "try-error")) {
185
+ stop(paste0("\nmclapply (MapQuery) error:", query))
186
+ }
187
+ }
81
188
 
82
189
  # Calculating mapping score
83
190
  log_info("- Calculating mapping score")
84
- mappingscore_args$anchors = anchors
85
- mappingscore = tryCatch({
86
- do_call(MappingScore, mappingscore_args)
87
- }, error = function(e) {
88
- if (e$message == "subscript out of bounds") {
89
- stop(paste0(
90
- "While calculating mapping score, the following error was encountered: \n",
91
- "subscript out of bounds. \n\n",
92
- "You may want to try a smaller `ndim` (default: 50) in `envs.MappingScore`."
93
- ))
191
+ mappingscore_sob_msg = paste0(
192
+ "While calculating mapping score, the following error was encountered: \n",
193
+ "subscript out of bounds. \n\n",
194
+ "You may want to try a smaller `ndim` (default: 50) in `envs.MappingScore`."
195
+ )
196
+ if (is.null(split_by)) {
197
+ mappingscore_args$anchors = anchors
198
+ mappingscore = tryCatch({
199
+ do_call(MappingScore, mappingscore_args)
200
+ }, error = function(e) {
201
+ if (e$message == "subscript out of bounds") stop(mappingscore_sob_msg)
202
+ stop(e)
203
+ })
204
+ } else {
205
+ mappingscore = mclapply(
206
+ X = seq_along(query),
207
+ FUN = function(i) {
208
+ mappingscore_args$anchors = anchors[[i]]
209
+ tryCatch({
210
+ do_call(MappingScore, mappingscore_args)
211
+ }, error = function(e) {
212
+ if (e$message == "subscript out of bounds") stop(mappingscore_sob_msg)
213
+ stop(e)
214
+ })
215
+ },
216
+ mc.cores = ncores
217
+ )
218
+ if (any(unlist(lapply(mappingscore, class)) == "try-error")) {
219
+ stop(paste0("\nmclapply (MappingScore) error:", mappingscore))
94
220
  }
95
- stop(e)
96
- })
221
+ }
97
222
 
98
223
  # Calculate mapping score and add to metadata
99
- log_info("- Calculating mapping score")
100
- query = AddMetaData(
101
- object = query,
102
- metadata = mappingscore,
103
- col.name = "mapping.score"
104
- )
224
+ log_info("- Adding mapping score to metadata")
225
+ if (is.null(split_by)) {
226
+ query = AddMetaData(
227
+ object = query,
228
+ metadata = mappingscore,
229
+ col.name = "mapping.score"
230
+ )
231
+ } else {
232
+ query = mclapply(
233
+ X = seq_along(query),
234
+ FUN = function(i) {
235
+ AddMetaData(
236
+ object = query[[i]],
237
+ metadata = mappingscore[[i]],
238
+ col.name = "mapping.score"
239
+ )
240
+ },
241
+ mc.cores = ncores
242
+ )
243
+ if (any(unlist(lapply(query, class)) == "try-error")) {
244
+ stop(paste0("\nmclapply (AddMetaData) error:", query))
245
+ }
246
+
247
+ # Combine the results
248
+ log_info("- Merging the results")
249
+ query = merge(query[[1]], query[2:length(query)], merge.dr = "ref.umap")
250
+ }
105
251
 
106
252
  # Add the alias to the metadata for the clusters
107
253
  log_info("- Adding ident to metadata and set as ident")
@@ -50,8 +50,18 @@ do_one_group <- function(obj, features, group, outputdir, h1) {
50
50
  classes[classes != group] <- "_REST"
51
51
  classes[classes == group] <- groupname
52
52
  if (any(table(classes) < 5)) {
53
- msg <- paste("Group", group, "has less than 5 cells, or only 5 cells left.")
53
+ msg <- paste(" Skipped. One of the groups has less than 5 cells.")
54
54
  log_warn(msg)
55
+ # write a warning.txt to odir with the message and table(classes)
56
+ write(paste0(msg, "\n\n"), file = file.path(odir, "warning.txt"))
57
+ write.table(
58
+ table(classes),
59
+ file = file.path(odir, "warning.txt"),
60
+ sep = "\t",
61
+ quote = FALSE,
62
+ row.names = FALSE,
63
+ append = TRUE
64
+ )
55
65
  return(
56
66
  list(
57
67
  list(kind = "error", content = msg),
@@ -84,14 +84,18 @@ do_one_comparison <- function(
84
84
 
85
85
  odir = file.path(groupdir, paste0(subset_prefix, compname))
86
86
  dir.create(odir, showWarnings = FALSE)
87
- if (ncol(exprs_case) < 3 || ncol(exprs_control) < 3) {
88
- log_warn(" Skip (not enough cells)")
89
- add_report(
87
+ if (ncol(exprs_case) < 5 || ncol(exprs_control) < 5) {
88
+ log_warn(" Skipped (not enough cells).")
89
+ wfile <- file.path(odir, "warning.txt")
90
+ write("Skipped (not enough cells)\n\n", file = wfile)
91
+ write(paste0("n_cells (Case):", ncol(exprs_case)), file = wfile, append = TRUE)
92
+ write(paste0("n_cells (Control):", ncol(exprs_control)), file = wfile, append = TRUE)
93
+
94
+ return(list(
90
95
  list(kind = "error", content = "Not enough cells"),
91
96
  h1 = groupname,
92
97
  h2 = compname
93
- )
94
- return (NULL)
98
+ ))
95
99
  }
96
100
  if (fgsea) {
97
101
  ranks = prerank(
biopipen/utils/gsea.R CHANGED
@@ -2,11 +2,36 @@ library(ggplot2)
2
2
  library(dplyr)
3
3
  library(tibble)
4
4
 
5
- .slugify <- function(x, non_alphanum_replace="-", collapse_replace=TRUE, tolower=FALSE) {
6
- x <- gsub("[^[:alnum:]_]", non_alphanum_replace, x)
7
- if(collapse_replace) x <- gsub(paste0(non_alphanum_replace, "+"), non_alphanum_replace, x)
8
- if(tolower) x <- tolower(x)
9
- x
5
+ if (!exists("slugify")) {
6
+ slugify <- function(x, non_alphanum_replace="-", collapse_replace=TRUE, tolower=FALSE) {
7
+ subs <- list(
8
+ "š"="s", "œ"="oe", "ž"="z", "ß"="ss", "þ"="y", "à"="a", "á"="a", "â"="a",
9
+ "ã"="a", "ä"="a", "å"="a", "æ"="ae", "ç"="c", "è"="e", "é"="e", "ê"="e",
10
+ "ë"="e", "ì"="i", "í"="i", "î"="i", "ï"="i", "ð"="d", "ñ"="n", "ò"="o",
11
+ "ó"="o", "ô"="o", "õ"="o", "ö"="o", "ø"="oe", "ù"="u", "ú"="u", "û"="u",
12
+ "ü"="u", "ý"="y", "ÿ"="y", "ğ"="g", "ı"="i", "ij"="ij", "ľ"="l", "ň"="n",
13
+ "ř"="r", "ş"="s", "ť"="t", "ų"="u", "ů"="u", "ý"="y", "ź"="z", "ż"="z",
14
+ "ſ"="s", "α"="a", "β"="b", "γ"="g", "δ"="d", "ε"="e", "ζ"="z", "η"="h",
15
+ "θ"="th", "ι"="i", "κ"="k", "λ"="l", "μ"="m", "ν"="n", "ξ"="x", "ο"="o",
16
+ "π"="p", "ρ"="r", "σ"="s", "τ"="t", "υ"="u", "φ"="ph", "χ"="ch", "ψ"="ps",
17
+ "ω"="o", "ά"="a", "έ"="e", "ή"="h", "ί"="i", "ό"="o", "ύ"="u", "ώ"="o",
18
+ "ϐ"="b", "ϑ"="th", "ϒ"="y", "ϕ"="ph", "ϖ"="p", "Ϛ"="st", "ϛ"="st", "Ϝ"="f",
19
+ "ϝ"="f", "Ϟ"="k", "ϟ"="k", "Ϡ"="k", "ϡ"="k", "ϰ"="k", "ϱ"="r", "ϲ"="s",
20
+ "ϳ"="j", "ϴ"="th", "ϵ"="e", "϶"="p"
21
+ )
22
+ # replace latin and greek characters to the closest english character
23
+ for (k in names(subs)) {
24
+ x <- gsub(k, subs[[k]], x)
25
+ }
26
+ x <- gsub("[^[:alnum:]_]", non_alphanum_replace, x)
27
+ if(collapse_replace) x <- gsub(
28
+ paste0(gsub("([][{}()+*^$|\\\\?.])", "\\\\\\1", non_alphanum_replace), "+"),
29
+ non_alphanum_replace,
30
+ x
31
+ )
32
+ if(tolower) x <- tolower(x)
33
+ x
34
+ }
10
35
  }
11
36
 
12
37
  localizeGmtfile <- function(gmturl, cachedir = tempdir()) {
@@ -25,7 +50,12 @@ localizeGmtfile <- function(gmturl, cachedir = tempdir()) {
25
50
  if (nrow(items) == 0) {
26
51
  stop(paste0("Empty GMT file: ", gmtfile, ", from ", gmturl))
27
52
  }
28
- if (nchar(items$V2[1]) < nchar(items$V1[1]) && nchar(items$V2[1]) > 0) {
53
+ if (
54
+ is.character(items$V2[1]) &&
55
+ nchar(items$V2[1]) < nchar(items$V1[1]) &&
56
+ nchar(items$V2[1]) > 0 &&
57
+ is.na(suppressWarnings(as.numeric(items$V2[1])))
58
+ ) {
29
59
  warning(paste0(
30
60
  "The second column is shorter, switching the first and second columns in GMT file ",
31
61
  gmtfile,
@@ -153,7 +183,8 @@ runFGSEA = function(
153
183
 
154
184
  write.table(
155
185
  gsea_res %>%
156
- mutate(leadingEdge = sapply(leadingEdge, function(x) paste(x, collapse=","))),
186
+ mutate(leadingEdge = sapply(leadingEdge, function(x) paste(x, collapse=",")),
187
+ slug = sapply(pathway, slugify)),
157
188
  file = file.path(outdir, "fgsea.txt"),
158
189
  row.names = FALSE,
159
190
  col.names = TRUE,
@@ -172,16 +203,16 @@ runFGSEA = function(
172
203
 
173
204
  tablefig = file.path(outdir, "gsea_table.png")
174
205
  png(tablefig, res=100, width=1000, height=200 + 40 * length(topPathways))
175
- plotGseaTable(
206
+ print(plotGseaTable(
176
207
  envs$pathways[topPathways],
177
208
  ranks,
178
209
  gsea_res,
179
210
  gseaParam = if (!is.null(envs$gseaParam)) envs$gseaParam else 1
180
- )
211
+ ))
181
212
  dev.off()
182
213
 
183
214
  for (pathway in topPathways) {
184
- enrfig = file.path(outdir, paste0("fgsea_", .slugify(pathway), ".png"))
215
+ enrfig = file.path(outdir, paste0("fgsea_", slugify(pathway), ".png"))
185
216
  png(enrfig, res=100, width=1000, height=800)
186
217
  print(plotEnrichment(
187
218
  envs$pathways[[pathway]],
biopipen/utils/misc.R CHANGED
@@ -33,6 +33,7 @@ bQuote <- function(x) {
33
33
  }
34
34
 
35
35
  #' Slugify a string
36
+ #' Remember also update the one in gsea.R
36
37
  #' @param x A string
37
38
  #' @param non_alphanum_replace Replace non-alphanumeric characters
38
39
  #' @param collapse_replace Collapse consecutive non-alphanumeric character replacements
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biopipen
3
- Version: 0.27.6
3
+ Version: 0.27.8
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
@@ -1,9 +1,9 @@
1
- biopipen/__init__.py,sha256=BwKhBzWMdVser1JHOUEX0Aa2nBqgua67wsNi17fRle0,23
1
+ biopipen/__init__.py,sha256=tTYofCNMPRiC5Qs7KVRV32NVPrdQ2HjfgoMDf4fChsQ,23
2
2
  biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
4
4
  biopipen/core/config.toml,sha256=20RCI30Peee1EQdfb_UbV3Hf74XUPndJnYZlUThytsw,1781
5
5
  biopipen/core/defaults.py,sha256=yPeehPLk_OYCf71IgRVCWuQRxLAMixDF81Ium0HtPKI,344
6
- biopipen/core/filters.py,sha256=HLrjXGsvvjRtTWIAmg_f4IMymWaRD769HlDwsCTh170,12424
6
+ biopipen/core/filters.py,sha256=-OIzS5F_yNZ0Nk5Ci16BepPWqLOGBmYVqX3W98RSK9Y,12488
7
7
  biopipen/core/proc.py,sha256=60lUP3PcUAaKbDETo9N5PEIoeOYrLgcSmuytmrhcx8g,912
8
8
  biopipen/core/testing.py,sha256=lZ_R5ZbYPO2NPuLHdbzg6HbD_f4j8paVVbyeUqwg6FE,3411
9
9
  biopipen/ns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -21,8 +21,8 @@ biopipen/ns/gsea.py,sha256=EsNRAPYsagaV2KYgr4Jv0KCnZGqayM209v4yOGGTIOI,7423
21
21
  biopipen/ns/misc.py,sha256=fzn0pXvdghMkQhu-e3MMapPNMyO6IAJbtTzVU3GbFa0,3246
22
22
  biopipen/ns/plot.py,sha256=fzJAKKl4a_tsVkLREGCQTFVHP049m33LdWgeYRb6v7M,5483
23
23
  biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
24
- biopipen/ns/scrna.py,sha256=7Gs1xxQoGM3TKxaQvbgKNyMDEsgatFopImzC-RcOEoA,103946
25
- biopipen/ns/scrna_metabolic_landscape.py,sha256=EhOtHQyoH-jRpzDoOI_06UbjEg6mhvbDEHKhek01bPk,28334
24
+ biopipen/ns/scrna.py,sha256=KL5Eu0mnIITLLSHAIz_sgr4ssmEU6AuBDXwedqYU7BI,105633
25
+ biopipen/ns/scrna_metabolic_landscape.py,sha256=6AhaynGG3lNRi96N2tReVT46BJMuEwooSSd2irBoN80,28347
26
26
  biopipen/ns/snp.py,sha256=EQ2FS0trQ7YThPmBVTpS66lc2OSfgQ6lCh6WnyP-C2g,5499
27
27
  biopipen/ns/stats.py,sha256=yJ6C1CXF84T7DDs9mgufqUOr89Rl6kybE5ji8Vnx6cw,13693
28
28
  biopipen/ns/tcgamaf.py,sha256=AFbUJIxiMSvsVY3RcHgjRFuMnNh2DG3Mr5slLNEyz6o,1455
@@ -126,10 +126,10 @@ biopipen/scripts/scrna/CellTypeAnnotation-sctype.R,sha256=1nKRtzhVoJ9y0yMg1sgI6u
126
126
  biopipen/scripts/scrna/CellTypeAnnotation.R,sha256=OwLM_G4D7TG4HhIJjQxgIQM92X86lsWp9MVyXTTkLSc,618
127
127
  biopipen/scripts/scrna/CellsDistribution.R,sha256=isDr5-EWvOeWwVZdjOSsdX3QUpEaDvQFulIYawqFaQc,18854
128
128
  biopipen/scripts/scrna/DimPlots.R,sha256=-mXOTMnpPxvR30XLjwcohFfFx7xTqWKKiICwJiD6yEo,1554
129
- biopipen/scripts/scrna/ExprImpution-alra.R,sha256=w3W1txJcdWg52-SETY2Z0lO7maDNfiMjBYIGN587YW0,843
130
- biopipen/scripts/scrna/ExprImpution-rmagic.R,sha256=jYIfqZpnvjKJkvItLnemPVtUApHBYQi1_L8rHVbEe1M,735
131
- biopipen/scripts/scrna/ExprImpution-scimpute.R,sha256=mg40qCUW7-nP5oHPvARq7dmtoahM0GRFWXQpum0BXVk,1082
132
- biopipen/scripts/scrna/ExprImpution.R,sha256=7768ezrr59xUZDXq8lO9jj2XhnkSsx-xxBmOD9_DO7c,313
129
+ biopipen/scripts/scrna/ExprImputation-alra.R,sha256=w3W1txJcdWg52-SETY2Z0lO7maDNfiMjBYIGN587YW0,843
130
+ biopipen/scripts/scrna/ExprImputation-rmagic.R,sha256=jYIfqZpnvjKJkvItLnemPVtUApHBYQi1_L8rHVbEe1M,735
131
+ biopipen/scripts/scrna/ExprImputation-scimpute.R,sha256=mg40qCUW7-nP5oHPvARq7dmtoahM0GRFWXQpum0BXVk,1082
132
+ biopipen/scripts/scrna/ExprImputation.R,sha256=GcdZJpkDpq88hRQjtLZY5-byp8V43stEFm5T-pQbU6A,319
133
133
  biopipen/scripts/scrna/MarkersFinder.R,sha256=M7fHTbHHErZ9JbLmjDkx-6yVIay0_h0MkvgFegnqL44,22918
134
134
  biopipen/scripts/scrna/MetaMarkers.R,sha256=9ve1X0TrDzS_ZEW6HtU3n8R-uPx7q-hYMMNFVDSE8wQ,11272
135
135
  biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256=JSHd-_-KiFqW8avCGxgU4T-C5BtDr2u0kwIvEu2lFIg,4188
@@ -141,12 +141,12 @@ biopipen/scripts/scrna/SeuratClusterStats-dimplots.R,sha256=gViDgQ8NorYD64iK0Fgc
141
141
  biopipen/scripts/scrna/SeuratClusterStats-features.R,sha256=W7iYhaFsC5EMZLO50QukYPLYGK4bq9kQc1VT5FwvI68,15496
142
142
  biopipen/scripts/scrna/SeuratClusterStats-hists.R,sha256=YhuD-GePjJPSkR0iLRgV_hiGHD_bnOIKp-LB6GCwquo,5037
143
143
  biopipen/scripts/scrna/SeuratClusterStats-ngenes.R,sha256=GVKIXFNS_syCuSN8oxoBkjxxAeI5LdSxh-qLVkUsbDA,2146
144
- biopipen/scripts/scrna/SeuratClusterStats-stats.R,sha256=TxQ0OcLwXwIgwL1mTLArboK0ATJIJhxWiv9DV_jBlhE,9255
144
+ biopipen/scripts/scrna/SeuratClusterStats-stats.R,sha256=bBbvNCvV6dZLg9zvhh2nR48_53md5A5UEqrPXD00MZU,9263
145
145
  biopipen/scripts/scrna/SeuratClusterStats.R,sha256=ouWoj7Q644uG3MUlT23AES8f74g38-jPtPhINSvoUas,1267
146
146
  biopipen/scripts/scrna/SeuratClustering.R,sha256=kAvQq3RV86_KSv9NlUtUeQrPKkbhSsnv6Q4DoiTu8M0,6403
147
147
  biopipen/scripts/scrna/SeuratFilter.R,sha256=BrYK0MLdaTtQvInMaQsmOt7oH_hlks0M1zykkJtg2lM,509
148
148
  biopipen/scripts/scrna/SeuratLoading.R,sha256=ekWKnHIqtQb3kHVQiVymAHXXqiUxs6KKefjZKjaykmk,900
149
- biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=Xn3VnvKqShuC0Ju05380wjuLVSdW0uWVzntdxjme244,4359
149
+ biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=_G8pG7NRV2GOFDzKBLY1nkXR0DO1c-6NkX990_hC8mk,9127
150
150
  biopipen/scripts/scrna/SeuratMetadataMutater.R,sha256=Pp4GsF3hZ6ZC2vroC3LSBmVa4B1p2L3hbh981yaAIeQ,1093
151
151
  biopipen/scripts/scrna/SeuratPreparing.R,sha256=t6GOcc9ZNwpRLeES7uBWja9RF6u6k5I_TXcdK4Ve7d0,18683
152
152
  biopipen/scripts/scrna/SeuratSplit.R,sha256=vdK11V39_Uo_NaOh76QWCtxObGaEr5Ynxqq0hTiSvsU,754
@@ -157,8 +157,8 @@ biopipen/scripts/scrna/Subset10X.R,sha256=T2nJBTwOe12AIKC2FZsMSv6xx3s-67CYZokpz5
157
157
  biopipen/scripts/scrna/TopExpressingGenes.R,sha256=kXMCYHVytgVgO_Uq66fKKFCFV2PPXE8VREy_0yYPLpU,7475
158
158
  biopipen/scripts/scrna/celltypist-wrapper.py,sha256=f5M8f4rU5nC7l17RS0YVmUPpLLz4D6PIcgWtA77UExM,1722
159
159
  biopipen/scripts/scrna/sctype.R,sha256=NaUJkABwF5G1UVm1CCtcMbwLSj94Mo24mbYCKFqo1Bw,6524
160
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R,sha256=nSBNn1BMwqoApTqmvzLeRhFu2JW_mNhOXICxmBYIP6E,4813
161
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R,sha256=ic8Fy8QqYDGh_izmvZVJ3KL66podg_CSF5ITL3FZsvo,5196
160
+ biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R,sha256=sOnHSH0Ld-tXSIXJPnXLYgRtEru5M0g3HRxbrHWQ_0U,5170
161
+ biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R,sha256=RPSxEHWk50Fyw5YPBVVGeWwd55Hi0zjbipLuM5O4tjs,5465
162
162
  biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R,sha256=95DLX1Rz0tobOuDZ8V9YdGgO0KiNthhccoeeOK21tno,16216
163
163
  biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R,sha256=rQ9iwGh9FNRZlJJzM4QItdyXmebfzLAq05ZAjb1kGUw,9831
164
164
  biopipen/scripts/snp/MatrixEQTL.R,sha256=zOR_mhn_sUXuxqgV82TPvDp-E1i5aJVA45QixyRP8no,5930
@@ -230,9 +230,9 @@ biopipen/utils/caching.R,sha256=qANQqH8p-VpvD8V4VSoqSfp0TFr4esujC7x3OFZsJMw,1687
230
230
  biopipen/utils/common_docstrs.py,sha256=Ow-g-yS5P7DEO37cP1X-xioRbYWygfQHxIuLIaDdrjs,6288
231
231
  biopipen/utils/gene.R,sha256=BzAwlLA8hO12vF-3t6IwEuTEeLa_jBll4zm_5qe3qoE,1243
232
232
  biopipen/utils/gene.py,sha256=qE_BqTayrJWxRdniffhcz6OhZcw9GUoOrj2EtFWH9Gw,2246
233
- biopipen/utils/gsea.R,sha256=2sN3AM0XjLWbTv6cB3JHCBWjuhmD4wEjPaaBY7wkhCI,7542
233
+ biopipen/utils/gsea.R,sha256=BUr4pwfo7ZyinOyNa4O4dDxA1c50qWb3lpph374Yw_A,9239
234
234
  biopipen/utils/io.R,sha256=jIYdqdn0iRWfQYAZa5CjXi3fikqmYvPPLIXhobRe8sw,537
235
- biopipen/utils/misc.R,sha256=jXusPDCxSIaYRq_qm4khUsu9nyMhbpBVcj8BVn4j8Ic,10629
235
+ biopipen/utils/misc.R,sha256=Y9J8gZDuPPYFxYNQtDEvKAk5j-K8j_-n0DunYvu_Hv8,10671
236
236
  biopipen/utils/misc.py,sha256=KJziAFY4Kl-0ZsO93vteY9gRLZg9BSYig-TDocHY36k,3601
237
237
  biopipen/utils/mutate_helpers.R,sha256=Bqy6Oi4rrPEPJw0Jq32bVAwwBfZv7JJL9jFcK5x-cek,17649
238
238
  biopipen/utils/plot.R,sha256=pzl37PomNeUZPxohHZ2w93j3Fc4T0Qrc62FF-9MTKdw,4417
@@ -240,7 +240,7 @@ biopipen/utils/reference.py,sha256=6bPSwQa-GiDfr7xLR9a5T64Ey40y24yn3QfQ5wDFZkU,4
240
240
  biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
241
241
  biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
242
242
  biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
243
- biopipen-0.27.6.dist-info/METADATA,sha256=t7ROsmFyR6-E4YXGAwiuNxRjZz5IX6_H7mT1rs9OSfE,882
244
- biopipen-0.27.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
245
- biopipen-0.27.6.dist-info/entry_points.txt,sha256=wu70aoBcv1UahVbB_5237MY-9M9_mzqmWjDD-oi3yz0,621
246
- biopipen-0.27.6.dist-info/RECORD,,
243
+ biopipen-0.27.8.dist-info/METADATA,sha256=3rTtE5ECOr8Y67BOOCAfleIqkVabeEstjmhzKwxUI-w,882
244
+ biopipen-0.27.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
245
+ biopipen-0.27.8.dist-info/entry_points.txt,sha256=wu70aoBcv1UahVbB_5237MY-9M9_mzqmWjDD-oi3yz0,621
246
+ biopipen-0.27.8.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- {% if envs.tool == "rmagic" %}
2
- {% include biopipen_dir + "/scripts/scrna/ExprImpution-rmagic.R" %}
3
- {% elif envs.tool == "scimpute" %}
4
- {% include biopipen_dir + "/scripts/scrna/ExprImpution-scimpute.R" %}
5
- {% elif envs.tool == "alra" %}
6
- {% include biopipen_dir + "/scripts/scrna/ExprImpution-alra.R" %}
7
- {% endif %}