biopipen 0.25.4__py3-none-any.whl → 0.26.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

@@ -0,0 +1,136 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
2
+
3
+ library(rlang)
4
+ library(dplyr)
5
+ library(tidyr)
6
+ library(fastLiquidAssociation)
7
+
8
+ infile <- {{in.infile | r}}
9
+ covfile <- {{in.covfile | r}}
10
+ groupfile <- {{in.groupfile | r}}
11
+ fmlfile <- {{in.fmlfile | r}}
12
+ outfile <- {{out.outfile | r}}
13
+ x <- {{envs.x | r}}
14
+ nvec <- {{envs.nvec | r}}
15
+ topn <- {{envs.topn | r}}
16
+ rvalue <- {{envs.rvalue | r}}
17
+ cut <- {{envs.cut | r}}
18
+ ncores <- {{envs.ncores | r}}
19
+ padj <- {{envs.padj | r}}
20
+ transpose_input <- {{envs.transpose_input | r}}
21
+ transpose_group <- {{envs.transpose_group | r}}
22
+ transpose_cov <- {{envs.transpose_cov | r}}
23
+ xyz_names <- {{envs.xyz_names | r}}
24
+ if (!is.null(xyz_names) && length(xyz_names) == 1) {
25
+ xyz_names <- trimws(strsplit(xyz_names, ",")[[1]])
26
+ }
27
+
28
+ if (is.null(groupfile) && is.null(nvec)) {
29
+ stop("Must provide either in.groupfile or envs.nvec")
30
+ }
31
+ if (!is.null(groupfile) && !is.null(nvec)) {
32
+ stop("Must provide either in.groupfile or envs.nvec, not both")
33
+ }
34
+
35
+ log_info("Reading and preparing data ...")
36
+ indata <- read.table(infile, header = TRUE, sep = "\t", row.names = 1, check.names = FALSE)
37
+ if (transpose_input) {
38
+ indata <- t(indata)
39
+ }
40
+ if (!is.null(covfile)) {
41
+ covdata <- read.table(covfile, header = TRUE, sep = "\t", row.names = 1)
42
+ if (transpose_cov) {
43
+ covdata <- t(covdata)
44
+ }
45
+ if (!isTRUE(all.equal(rownames(indata), rownames(covdata)))) {
46
+ stop("Row names of indata and covdata must be identical")
47
+ }
48
+ indata <- indata %>% mutate(across(everything(), function(xx) {
49
+ lm(xx ~ as.matrix(covdata))$residuals
50
+ }))
51
+ }
52
+
53
+ expand_range <- function(range) {
54
+ items <- trimws(strsplit(range, ",|-")[[1]])
55
+ num_items <- as.numeric(items)
56
+ if (anyNA(num_items)) {
57
+ # it's sample names
58
+ return(match(items, colnames(indata)))
59
+ }
60
+ return(num_items)
61
+ }
62
+
63
+ cut <- cut %||% max(ceiling(nrow(indata)/22), 4)
64
+ if (!is.null(x)) { x <- expand_range(x) }
65
+ if (!is.null(groupfile)) {
66
+ groupdata <- read.table(groupfile, header = TRUE, sep = "\t", row.names = 1)
67
+ if (transpose_group) {
68
+ groupdata <- t(groupdata)
69
+ }
70
+ if (!isTRUE(all.equal(rownames(indata), rownames(groupdata)))) {
71
+ stop("Row names of indata and groupdata must be identical")
72
+ }
73
+ nvec <- (ncol(indata) + 1) : (ncol(indata) + ncol(groupdata))
74
+ indata <- cbind(indata, groupdata)
75
+ } else {
76
+ nvec <- expand_range(nvec)
77
+ }
78
+
79
+ log_info("Running fastLiquidAssociation ...")
80
+ indata <- as.matrix(indata)
81
+ mla <- fastMLA(
82
+ data = indata,
83
+ topn = topn,
84
+ rvalue = rvalue,
85
+ cut = cut,
86
+ threads = ncores,
87
+ nvec = nvec
88
+ )
89
+
90
+ if (nrow(mla) == 0) {
91
+ log_warn("No significant associations found")
92
+ out <- data.frame(
93
+ X12 = character(),
94
+ X21 = character(),
95
+ X3 = character(),
96
+ rhodiff = numeric(),
97
+ `MLA.value` = numeric(),
98
+ estimates = numeric(),
99
+ `san.se` = numeric(),
100
+ wald = numeric(),
101
+ Pval = numeric(),
102
+ model = character()
103
+ )
104
+ } else {
105
+ cnm <- mass.CNM(data = indata, GLA.mat = mla, nback = topn)
106
+ out <- cnm$`top p-values` %>%
107
+ dplyr::select(X12 = "X1 or X2", X21 = "X2 or X1", everything(), Pval = "p value")
108
+ }
109
+
110
+ if (!is.null(fmlfile)) {
111
+ fmldata <- read.table(fmlfile, header = FALSE, sep = "\t", row.names = NULL)
112
+ colnames(fmldata) <- c("Z", "X", "Y")
113
+ all_combns <- fmldata %>% unite("XYZ", X, Y, Z, sep = " // ") %>% pull(XYZ)
114
+ out <- out %>%
115
+ unite("XYZ", X12, X21, X3, sep = " // ", remove = FALSE) %>%
116
+ dplyr::filter(XYZ %in% all_combns) %>%
117
+ dplyr::select(-XYZ)
118
+ }
119
+
120
+ if (!is.null(xyz_names)) {
121
+ out <- out %>%
122
+ dplyr::select(
123
+ !!sym(xyz_names[1]) := "X12",
124
+ !!sym(xyz_names[2]) := "X21",
125
+ !!sym(xyz_names[3]) := "X3",
126
+ everything()
127
+ )
128
+ }
129
+
130
+ if (padj != "none") {
131
+ log_info("Calculating adjusted p-values ...")
132
+ out$Padj <- p.adjust(out$Pval, method = padj)
133
+ }
134
+
135
+ log_info("Writing output ...")
136
+ write.table(out, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)
@@ -0,0 +1,128 @@
1
+ source("{{biopipen_dir}}/utils/misc.R")
2
+
3
+ library(metap)
4
+ library(rlang)
5
+ library(dplyr)
6
+
7
+ infiles <- {{in.infiles | r}}
8
+ outfile <- {{out.outfile | r}}
9
+ id_cols <- {{envs.id_cols | r}}
10
+ id_exprs <- {{envs.id_exprs | r}}
11
+ pval_cols <- {{envs.pval_cols | r}}
12
+ method <- {{envs.method | r}}
13
+ na <- {{envs.na | r}}
14
+ padj <- {{envs.padj | r}}
15
+
16
+ if (method == "fisher") { method = "sumlog" }
17
+
18
+ if (length(infiles) == 1 && padj == "none") {
19
+ log_info("Only one input file, copying to output ...")
20
+ file.copy(infiles, outfile)
21
+ } else if (length(infiles) == 1) {
22
+ log_info("Only one input file, performing p-value adjustment ...")
23
+ if (is.null(pval_cols)) {
24
+ stop("Must provide envs.pval_cols")
25
+ }
26
+ indata <- read.table(infiles, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
27
+ if (!pval_cols %in% colnames(indata)) {
28
+ stop("envs.pval_cols does not exist in input file")
29
+ }
30
+ indata$Padj <- p.adjust(indata[, pval_cols], method = padj)
31
+
32
+ log_info("Writing output ...")
33
+ write.table(indata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
34
+ } else {
35
+ # Check pval_cols
36
+ if (is.null(pval_cols)) {
37
+ stop("Must provide envs.pval_cols")
38
+ }
39
+ if (length(pval_cols) == 1) {
40
+ pval_cols <- trimws(strsplit(pval_cols, ",")[[1]])
41
+ }
42
+ if (length(pval_cols) == 1) {
43
+ pval_cols <- rep(pval_cols, length(infiles))
44
+ }
45
+ if (length(pval_cols) != length(infiles)) {
46
+ stop("envs.pval_cols must be a single name or have the same length as in.infiles")
47
+ }
48
+
49
+ # Check id_cols
50
+ if (is.null(id_cols)) {
51
+ stop("Must provide envs.id_cols")
52
+ }
53
+ if (length(id_cols) == 1) {
54
+ id_cols <- trimws(strsplit(id_cols, ",")[[1]])
55
+ }
56
+
57
+ # Check id_exprs
58
+ if (!is.null(id_exprs)) {
59
+ if (length(id_exprs) == 1) {
60
+ id_exprs <- rep(id_exprs, length(infiles))
61
+ }
62
+ if (length(id_exprs) != length(infiles)) {
63
+ stop("envs.id_exprs must be a single expression or have the same length as in.infiles")
64
+ }
65
+ if (length(id_cols) != 1) {
66
+ stop("envs.id_cols must be a single name if envs.id_exprs is provided")
67
+ }
68
+ }
69
+
70
+ log_info("Reading and preparing data ...")
71
+ outdata <- NULL
72
+ for (i in seq_along(infiles)) {
73
+ infile <- infiles[i]
74
+ name <- tools::file_path_sans_ext(basename(infile))
75
+ pval_col <- paste0("Pval_", name)
76
+ dat <- read.table(
77
+ infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE
78
+ )
79
+ if (!is.null(id_exprs)) {
80
+ dat <- dat %>% mutate(!!sym(id_cols) := !!parse_expr(id_exprs[i]))
81
+ }
82
+ dat <- dat %>% dplyr::select(all_of(id_cols), !!sym(pval_col) := !!sym(pval_cols[i]))
83
+
84
+ if (is.null(outdata)) {
85
+ outdata <- dat
86
+ } else {
87
+ outdata <- full_join(outdata, dat, by = id_cols)
88
+ }
89
+ }
90
+
91
+ log_info("Running metap on each row ...")
92
+ metaps <- c()
93
+ ns <- c()
94
+ pval_columns <- setdiff(colnames(outdata), id_cols)
95
+ for (i in seq_len(nrow(outdata))) {
96
+ ps <- unlist(outdata[i, pval_columns, drop = TRUE])
97
+ if (na == -1) {
98
+ ps <- ps[!is.na(ps)]
99
+ } else {
100
+ ps[is.na(ps)] <- na
101
+ }
102
+ if (length(ps) == 0) {
103
+ metaps <- c(metaps, NA)
104
+ ns <- c(ns, NA)
105
+ } else if (length(ps) == 1) {
106
+ metaps <- c(metaps, ps)
107
+ ns <- c(ns, 1)
108
+ } else {
109
+ metaps <- c(metaps, do.call(method, list(ps))$p)
110
+ ns <- c(ns, length(ps))
111
+ }
112
+ }
113
+ outdata$MetaPval <- metaps
114
+ outdata$N <- ns
115
+ outdata <- outdata %>% arrange(MetaPval)
116
+
117
+ if (padj != "none") {
118
+ log_info("Calculating adjusted p-values ...")
119
+ outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
120
+
121
+ }
122
+
123
+ log_info("Writing output ...")
124
+ write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
125
+ }
126
+
127
+
128
+
@@ -116,13 +116,13 @@ get_groups <- function(order) {
116
116
  }
117
117
 
118
118
  perpare_case <- function(casename, case) {
119
- log_info("Preparing case: {casename} ...")
119
+ log_info("- Processing case: {casename} ...")
120
120
  # Check if required keys are provided
121
121
  if (is.null(case$subject) || length(case$subject) == 0) {
122
- stop(paste("`subject` is required for case:", casename))
122
+ stop(paste(" `subject` is required for case:", casename))
123
123
  }
124
124
  if (is.null(case$group) || length(case$group) == 0) {
125
- stop(paste("`group` is required for case:", casename))
125
+ stop(paste(" `group` is required for case:", casename))
126
126
  }
127
127
  if (!is.null(case$order) && length(case$order) > 0) {
128
128
  has_comma <- grepl(",", case$order)
@@ -134,13 +134,8 @@ perpare_case <- function(casename, case) {
134
134
  ))
135
135
  } else if (!any(has_comma)) {
136
136
  if (length(case$order) > 2) {
137
- log_warn(
138
- paste0(
139
- "- Order of groups in case:", casename,
140
- " is not recommended, please use comma to separate groups. \n",
141
- "Instead of `['A', 'B', 'C']`, use `['A,B', 'A,C', 'B,C']`."
142
- )
143
- )
137
+ log_warn(" Order of groups is not recommended, please use comma to separate groups.")
138
+ log_warn(" Instead of `['A', 'B', 'C']`, use `['A,B', 'A,C', 'B,C']`.")
144
139
  case$order <- sapply(
145
140
  combn(case$order, 2, simplify = FALSE),
146
141
  function(x) paste(x, collapse = ",")
@@ -151,8 +146,8 @@ perpare_case <- function(casename, case) {
151
146
  } else {
152
147
  stop(
153
148
  paste0(
154
- "- Order of groups in case:", casename,
155
- " is not consistent, please use comma to separate groups. \n",
149
+ " Order of groups in case:", casename,
150
+ " is not consistent, please use comma to separate groups. ",
156
151
  "Instead of `['A', 'B', 'C']`, use `['A,B', 'A,C', 'B,C']`, ",
157
152
  "however, this is inconsistent: `['A,B', 'C']`"
158
153
  )
@@ -255,14 +250,16 @@ plot_scatter <- function(counts, subject, suf1, suf2) {
255
250
  }
256
251
  ggplot(plotdata) +
257
252
  geom_point(
258
- aes_string(
259
- x = bQuote(suf1), y = bQuote(suf2), color = "Type", size = "Size", fill = "Type"
253
+ aes(
254
+ x = !!sym(suf1),
255
+ y = !!sym(suf2),
256
+ color = Type,
257
+ size = Size,
258
+ fill = Type
260
259
  ),
261
260
  alpha = .6,
262
261
  shape = 21
263
262
  ) +
264
- # geom_point(aes_string(x=x, y=y, color='color'), shape=1) +
265
- # scale_color_manual(values=color) +
266
263
  scale_x_continuous(
267
264
  trans = "log2",
268
265
  limits = c(minx, maxx),
@@ -277,7 +274,6 @@ plot_scatter <- function(counts, subject, suf1, suf2) {
277
274
  ) +
278
275
  theme_prism(base_size = 16) +
279
276
  scale_size(guide = "none") +
280
- # theme(legend.position = "none") +
281
277
  labs(
282
278
  title = bquote(.(subject) ~ (italic(n) == .(n_formatted))),
283
279
  subtitle = subtitle
@@ -302,61 +298,38 @@ plot_venndg <- function(counts, groups, singletons) {
302
298
  venn <- Venn(venn_data)
303
299
  vdata <- process_data(venn)
304
300
  vregion <- venn_region(vdata)
305
- sregion <- head(vregion, length(groups))
306
- sregion$count = singletons[sregion$name, "count"]
307
- sregion <- sregion %>% mutate(name = paste0(name, " singletons"))
301
+ vregion$singleton_count = singletons[vregion$name, "count"]
308
302
  vregion <- vregion %>% mutate(
309
303
  count_perc = round(count / sum(count) * 100, 1),
310
- count_str = paste0(count, " (", count_perc, "%)")
304
+ count_str = paste0(count, " (", count_perc, "%)"),
305
+ count_str = if_else(is.na(singleton_count), count_str, paste0(count_str, "\nsingletons = ", singleton_count))
311
306
  )
312
307
 
313
- # Align the catagory labels
314
- cat_nudge_y <- 0
315
- if (length(groups) == 3) { cat_nudge_y <- c(-400, 0, -400) }
316
- # Shift Count labels
317
- count_nudge_y <- -10
318
- if (length(groups) == 3) { count_nudge_y <- c(20, -20, 20, rep(0, nrow(vregion) - 3)) }
319
- # Shift the singletons stat labels
320
- label_nudge_y <- 60
321
- if (length(groups) == 3) { label_nudge_y <- c(60, -60, -60) }
322
-
323
308
  venn_p <- ggplot() +
324
309
  # 1. region count layer
325
310
  geom_sf(aes(fill = count), data = venn_region(vdata)) +
326
311
  # 2. set edge layer
327
312
  # geom_sf(aes(color = factor(id)), data = venn_setedge(data), show.legend = FALSE) +
328
313
  # 3. set label layer
329
- geom_sf_text(aes(label = name), data = venn_setlabel(vdata), nudge_y = cat_nudge_y) +
314
+ geom_sf_text(aes(label = name), data = venn_setlabel(vdata)) +
330
315
  # 4. region label layer
331
316
  geom_sf_label(
332
317
  aes(label = count_str),
333
318
  alpha = .8,
334
319
  label.padding = unit(.2, "lines"),
335
- data = vregion,
336
- nudge_y = count_nudge_y
320
+ data = vregion
337
321
  ) +
338
322
  # 5. singletons label layer
339
323
  scale_fill_distiller(palette = "Oranges", direction = 1) +
340
- new_scale_fill() +
341
- geom_sf_label(
342
- aes(label = count, fill = name),
343
- alpha = .6,
344
- data = sregion,
345
- nudge_y = label_nudge_y,
346
- label.padding = unit(1, "lines"),
347
- label.r = unit(1.2, "lines"),
348
- label.size = 0.05,
349
- show.legend = TRUE
350
- ) +
351
324
  theme_void() +
352
- theme(plot.margin = margin(1,1,1,1, "cm")) +
353
- scale_fill_brewer(palette = "Reds", name = "Singletons")
325
+ theme(plot.margin = margin(1,1,1,1, "cm"))
354
326
 
355
327
  venn_p
356
328
  }
357
329
 
358
330
  plot_upset <- function(counts, singletons) {
359
331
  query_singleton <- function(row) { row["Singletons"] == "true" }
332
+ query_multiplet <- function(row) { rep(TRUE, length(row)) }
360
333
 
361
334
  cnts <- column_to_rownames(counts, "CDR3.aa") %>%
362
335
  mutate(across(everything(), ~ as.integer(as.logical(.x))))
@@ -365,7 +338,19 @@ plot_upset <- function(counts, singletons) {
365
338
  cnts[sgltns, "Singletons"] <- "true"
366
339
  sets <- setdiff(colnames(cnts), "Singletons")
367
340
 
341
+ # Fix: Error in fix.by(by.x, x) : 'by' must specify uniquely valid columns
342
+ colnames(cnts) <- make.names(colnames(cnts))
343
+ sets <- make.names(sets)
344
+
368
345
  upset(cnts, sets = sets, query.legend = "top", sets.x.label = "# clones", queries = list(
346
+ list(
347
+ # in order to add legend
348
+ # actually mark all, but singleton will override
349
+ query = query_multiplet,
350
+ color = "#3b3b3b",
351
+ active = TRUE,
352
+ query.name = "Multiplets"
353
+ ),
369
354
  list(
370
355
  query = query_singleton,
371
356
  color = "orange",
@@ -407,7 +392,7 @@ handle_subject <- function(i, subjects, casename, case) {
407
392
  mutate(across(everything(), as.character)) %>%
408
393
  paste(collapse = "-")
409
394
 
410
- log_info("Handling {i} {case$subject} ...")
395
+ log_info(" Handling {subject} ({i}/{nrow(subjects)}) ...")
411
396
 
412
397
  if (!is.null(case$subset)) {
413
398
  counts <- cldata %>% filter(!!parse_expr(case$subset))
@@ -432,7 +417,7 @@ handle_subject <- function(i, subjects, casename, case) {
432
417
  case$order <- sapply(combn(groups, 2, simplify = FALSE), function(x) paste(x, collapse = ","))
433
418
  }
434
419
  if (length(unique(counts[[case$group]])) < 2) {
435
- log_warn("{casename}, Subject doesn't have enough groups: {subject}")
420
+ log_warn(" - Subject doesn't have enough groups: {subject}")
436
421
  return()
437
422
  }
438
423
  singletons = counts %>%
@@ -452,20 +437,6 @@ handle_subject <- function(i, subjects, casename, case) {
452
437
  select(CDR3.aa, !!!syms(groups))
453
438
  counts[is.na(counts)] <- 0
454
439
 
455
- # # Save samples to group_by so they can be aligned accordingly in the report
456
- # if (!is.null(section)) {
457
- # group_dir <- file.path(casedir, "section")
458
- # dir.create(group_dir, showWarnings = FALSE)
459
-
460
- # sgroups <- subject_row %>%
461
- # left_join(cldata) %>%
462
- # pull(section) %>%
463
- # unique() %>%
464
- # paste(collapse = "-")
465
- # group_file <- file.path(group_dir, paste0(slugify(sgroups), ".txt"))
466
- # cat(subject, file = group_file, sep = "\n", append = TRUE)
467
- # }
468
-
469
440
  # Save counts
470
441
  counts_dir <- file.path(casedir, "counts")
471
442
  countfile <- file.path(counts_dir, paste0(slugify(subject), ".txt"))
@@ -495,13 +466,7 @@ handle_subject <- function(i, subjects, casename, case) {
495
466
  for (j in seq_along(case$order)) {
496
467
  pair <- strsplit(case$order[j], ",")[[1]]
497
468
  if (length(setdiff(pair, groups)) > 0) {
498
- log_warn(
499
- paste0(
500
- "- One of the comparisons doesn't exist in case (", casename,
501
- ") for subject (", subject, "): ",
502
- case$order[j]
503
- )
504
- )
469
+ log_warn(" - Comparison {case$order[j]} doesn't exist.")
505
470
  next
506
471
  }
507
472
  scatter_p <- plot_scatter(counts, subject, pair[1], pair[2])
@@ -534,7 +499,7 @@ handle_subject <- function(i, subjects, casename, case) {
534
499
 
535
500
  h <- headings(case$section, casename, "Overlapping Clones (Venn Diagram)")
536
501
  add_report(
537
- list(src = venn_png),
502
+ list(src = venn_png, name = subject),
538
503
  h1 = h$h1,
539
504
  h2 = h$h2,
540
505
  h3 = h$h3,
@@ -549,7 +514,7 @@ handle_subject <- function(i, subjects, casename, case) {
549
514
 
550
515
  h <- headings(case$section, casename, "Overlapping Clones (UpSet Plots)")
551
516
  add_report(
552
- list(src = upset_png),
517
+ list(src = upset_png, name = subject),
553
518
  h1 = h$h1,
554
519
  h2 = h$h2,
555
520
  h3 = h$h3,
biopipen/utils/misc.R CHANGED
@@ -29,6 +29,25 @@ bQuote <- function(x) {
29
29
  #' @param tolower Convert to lowercase
30
30
  #' @return A slugified string
31
31
  slugify <- function(x, non_alphanum_replace="-", collapse_replace=TRUE, tolower=FALSE) {
32
+ subs <- list(
33
+ "š"="s", "œ"="oe", "ž"="z", "ß"="ss", "þ"="y", "à"="a", "á"="a", "â"="a",
34
+ "ã"="a", "ä"="a", "å"="a", "æ"="ae", "ç"="c", "è"="e", "é"="e", "ê"="e",
35
+ "ë"="e", "ì"="i", "í"="i", "î"="i", "ï"="i", "ð"="d", "ñ"="n", "ò"="o",
36
+ "ó"="o", "ô"="o", "õ"="o", "ö"="o", "ø"="oe", "ù"="u", "ú"="u", "û"="u",
37
+ "ü"="u", "ý"="y", "ÿ"="y", "ğ"="g", "ı"="i", "ij"="ij", "ľ"="l", "ň"="n",
38
+ "ř"="r", "ş"="s", "ť"="t", "ų"="u", "ů"="u", "ý"="y", "ź"="z", "ż"="z",
39
+ "ſ"="s", "α"="a", "β"="b", "γ"="g", "δ"="d", "ε"="e", "ζ"="z", "η"="h",
40
+ "θ"="th", "ι"="i", "κ"="k", "λ"="l", "μ"="m", "ν"="n", "ξ"="x", "ο"="o",
41
+ "π"="p", "ρ"="r", "σ"="s", "τ"="t", "υ"="u", "φ"="ph", "χ"="ch", "ψ"="ps",
42
+ "ω"="o", "ά"="a", "έ"="e", "ή"="h", "ί"="i", "ό"="o", "ύ"="u", "ώ"="o",
43
+ "ϐ"="b", "ϑ"="th", "ϒ"="y", "ϕ"="ph", "ϖ"="p", "Ϛ"="st", "ϛ"="st", "Ϝ"="f",
44
+ "ϝ"="f", "Ϟ"="k", "ϟ"="k", "Ϡ"="k", "ϡ"="k", "ϰ"="k", "ϱ"="r", "ϲ"="s",
45
+ "ϳ"="j", "ϴ"="th", "ϵ"="e", "϶"="p"
46
+ )
47
+ # replace latin and greek characters to the closest english character
48
+ for (k in names(subs)) {
49
+ x <- gsub(k, subs[[k]], x)
50
+ }
32
51
  x <- gsub("[^[:alnum:]_]", non_alphanum_replace, x)
33
52
  if(collapse_replace) x <- gsub(paste0(non_alphanum_replace, "+"), non_alphanum_replace, x)
34
53
  if(tolower) x <- tolower(x)
biopipen/utils/misc.py CHANGED
@@ -2,9 +2,24 @@ from __future__ import annotations
2
2
  from pathlib import Path
3
3
 
4
4
  import sys
5
+ import logging
5
6
  from typing import List
6
7
  from biopipen.core.filters import dict_to_cli_args # noqa: F401
7
8
 
9
+ logger = logging.getLogger("biopipen_job")
10
+ logger.setLevel(logging.INFO)
11
+ _handler = logging.StreamHandler(sys.stdout)
12
+ # Use same log format as in R
13
+ # {sprintf("%-7s", level)} [{format(time, "%Y-%m-%d %H:%M:%S")}] {msg}
14
+ # so the logs can be populated by pipen-poplog
15
+ _handler.setFormatter(
16
+ logging.Formatter(
17
+ "%(levelname)-7s [%(asctime)s] %(message)s",
18
+ datefmt="%Y-%m-%d %H:%M:%S",
19
+ )
20
+ )
21
+ logger.addHandler(_handler)
22
+
8
23
 
9
24
  def exec_code(code, global_vars=None, local_vars=None, return_var=None):
10
25
  global_vars = global_vars or {}
@@ -1,23 +1,22 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: biopipen
3
- Version: 0.25.4
3
+ Version: 0.26.1
4
4
  Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
5
5
  License: MIT
6
6
  Author: pwwang
7
7
  Author-email: pwwang@pwwang.com
8
- Requires-Python: >=3.8,<4.0
8
+ Requires-Python: >=3.9,<4.0
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.8
12
11
  Classifier: Programming Language :: Python :: 3.9
13
12
  Classifier: Programming Language :: Python :: 3.10
14
13
  Classifier: Programming Language :: Python :: 3.11
15
14
  Classifier: Programming Language :: Python :: 3.12
16
15
  Provides-Extra: runinfo
17
- Requires-Dist: datar[pandas] (>=0.15.4,<0.16.0)
18
- Requires-Dist: pipen-board[report] (>=0.14,<0.15)
19
- Requires-Dist: pipen-cli-run (>=0.12,<0.13)
20
- Requires-Dist: pipen-filters (>=0.11,<0.12)
21
- Requires-Dist: pipen-poplog (>=0.0.2,<0.0.3)
22
- Requires-Dist: pipen-runinfo (>=0.5,<0.6) ; extra == "runinfo"
23
- Requires-Dist: pipen-verbose (>=0.10,<0.11)
16
+ Requires-Dist: datar[pandas] (>=0.15.5,<0.16.0)
17
+ Requires-Dist: pipen-board[report] (>=0.15,<0.16)
18
+ Requires-Dist: pipen-cli-run (>=0.13,<0.14)
19
+ Requires-Dist: pipen-filters (>=0.12,<0.13)
20
+ Requires-Dist: pipen-poplog (>=0.1,<0.2)
21
+ Requires-Dist: pipen-runinfo (>=0.6,<0.7) ; extra == "runinfo"
22
+ Requires-Dist: pipen-verbose (>=0.11,<0.12)