biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +307 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +14 -2
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  73. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  74. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  75. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  76. biopipen/scripts/scrna/RadarPlots.R +1 -1
  77. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  78. biopipen/scripts/scrna/ScSimulation.R +11 -10
  79. biopipen/scripts/scrna/ScVelo.py +605 -0
  80. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  81. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  82. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  83. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  84. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  85. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  86. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  87. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  88. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  89. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  90. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  91. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  92. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  93. biopipen/scripts/scrna/Subset10X.R +2 -2
  94. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  95. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  96. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  99. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  100. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  101. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  102. biopipen/scripts/snp/PlinkFreq.R +34 -41
  103. biopipen/scripts/snp/PlinkHWE.R +23 -18
  104. biopipen/scripts/snp/PlinkHet.R +26 -22
  105. biopipen/scripts/snp/PlinkIBD.R +30 -34
  106. biopipen/scripts/stats/ChowTest.R +9 -8
  107. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  108. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  109. biopipen/scripts/stats/Mediation.R +8 -8
  110. biopipen/scripts/stats/MetaPvalue.R +11 -13
  111. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  112. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  113. biopipen/scripts/tcr/ClonalStats.R +5 -4
  114. biopipen/scripts/tcr/CloneResidency.R +3 -3
  115. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  116. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  117. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  118. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  119. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  120. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  121. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  122. biopipen/scripts/tcr/TCRClustering.R +86 -97
  123. biopipen/scripts/tcr/TESSA.R +65 -115
  124. biopipen/scripts/tcr/VJUsage.R +5 -5
  125. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  126. biopipen/utils/common_docstrs.py +66 -63
  127. biopipen/utils/reporter.py +177 -0
  128. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  129. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
  130. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  131. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -1,30 +1,106 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(SeuratWrappers)
4
2
  library(Seurat)
3
+ library(purrr)
4
+ library(stringr)
5
+ library(biopipen.utils)
5
6
 
6
7
  infile = {{in.infile | r}}
7
8
  outfile = {{out.outfile | r}}
8
9
  envs = {{envs.alra_args | r}}
9
10
 
10
- log_info("Loading Seurat object")
11
- sobj = readRDS(infile)
11
+ log <- get_logger()
12
+
13
+ log$info("Loading Seurat object")
14
+ sobj <- read_obj(infile)
12
15
  assay <- DefaultAssay(sobj)
13
16
 
14
- log_info("Imputing expression values, using ALRA")
15
- envs$object = sobj
17
+ # https://github.com/mojaveazure/seurat-disk/issues/102
18
+ # https://github.com/simoncmo/shared_seurat_scripts/blob/main/function_seurat_janitor.R
19
+ # Try to fix the issue with SCTModel
20
+ log$info("Trying to fix SCTModel issue (see mojaveazure/seurat-disk#102)")
21
+ # --------------------------------------------------------------------------
22
+ # Handle missing median_umi
23
+ fix_median_umi = function(SCTModel_obj){
24
+ err_message = ''
25
+ tryCatch({ test <- methods::validObject(SCTModel_obj) },
26
+ error = function(error_message) {
27
+ err_message <<- as.character(error_message)
28
+ })
29
+ missing_medium_umi = stringr::str_detect(err_message, 'median_umi')
30
+
31
+ if(missing_medium_umi){
32
+ message('Missing medium_umi, calculate again from cell.attributes$umi')
33
+ slot(SCTModel_obj, 'median_umi') = median(SCTModel_obj@cell.attributes$umi)
34
+ }
35
+ return(SCTModel_obj)
36
+ }
37
+
38
+ # Cleaning empty objects
39
+ # General purpose
40
+ clean_seurat_obj_list = function(obj_list, attirbute_to_check){
41
+ if(missing(attirbute_to_check)) {stop("Need attributes to check for cleaning")}
42
+ # Object type
43
+ obj_type = class(obj_list[[1]])[[1]]
44
+
45
+ # Count
46
+ obj_size = unlist(purrr::map(obj_list, function(object){
47
+ nrow(slot(object, attirbute_to_check))
48
+ }))
49
+
50
+ # Remove empty
51
+ if(length(obj_size ==0) != 0 ){
52
+ message(str_glue('Removing {length(obj_size ==0)} empty object from the {obj_type} object list'))
53
+ obj_list = obj_list[obj_size!=0]
54
+ message(str_glue('{length(obj_list)} {obj_type} object(s) left'))
55
+ }
56
+ obj_list
57
+ }
58
+
59
+ # for SCTModel.list slot
60
+ clean_seurat_SCTModel_list = function(sct_model_list){
61
+ clean_seurat_obj_list(obj_list = sct_model_list, attirbute_to_check = 'cell.attributes')
62
+ }
63
+
64
+ fix_seurat_SCT = function(obj){
65
+ # Check first
66
+ if(!'SCT' %in% Assays(obj)){
67
+ message('SCT assay not found. Nothing to fix')
68
+ return(obj)
69
+ }
70
+
71
+ # Model list
72
+ sct_model_list = obj$SCT@SCTModel.list
73
+ # 1. clean SCTModel list
74
+ sct_model_list = clean_seurat_SCTModel_list(sct_model_list)
75
+
76
+ # 2. fix missing median_umi
77
+ sct_model_list = map(sct_model_list, function(sct_model){
78
+ fix_median_umi(sct_model)
79
+ })
80
+
81
+ # Add back and retrun
82
+ obj$SCT@SCTModel.list = sct_model_list
83
+
84
+ return(obj)
85
+ }
86
+ # --------------------------------------------------------------------------
87
+ sobj = fix_seurat_SCT(sobj)
88
+
89
+ log$info("Imputing expression values, using ALRA")
90
+ envs$object <- sobj
16
91
  sobj = do_call(RunALRA, envs)
92
+ envs$object <- NULL
93
+ gc()
17
94
 
18
- # sobj = RunALRA(sobj)
19
- log_info("Renaming assays")
95
+ log$info("Renaming assays")
20
96
  sobj = RenameAssays(sobj, assay.name = assay, new.assay.name = "RAW")
21
97
  sobj = RenameAssays(sobj, assay.name = "alra", new.assay.name = assay)
22
98
  DefaultAssay(sobj) <- assay
23
99
 
24
- sobj@misc$impute = "alra"
100
+ sobj@misc$impute_method = "alra"
25
101
 
26
- log_info("Saving Seurat object")
27
- saveRDS(sobj, outfile)
102
+ log$info("Saving Seurat object")
103
+ save_obj(sobj, outfile)
28
104
 
29
105
  # choosek_plot_file = file.path(dirname(outfile), "choosek.png")
30
106
  # png(choosek_plot_file, width = 1200, height = 1000, res = 100)
@@ -1,30 +1,256 @@
1
+ tryCatch(
2
+ {
3
+ # in order to load Rmagic
4
+ workdir <- {{ job.outdir | r }}
5
+ conda_prefix <- Sys.getenv("CONDA_PREFIX")
6
+ setwd(workdir)
7
+ if (!dir.exists("miniconda3")) {
8
+ file.symlink(conda_prefix, "miniconda3")
9
+ }
10
+ },
11
+ error = function(e) {}
12
+ )
1
13
 
2
- tryCatch({
3
- # in order to load Rmagic
4
- workdir = {{job.outdir | r}}
5
- conda_prefix = Sys.getenv("CONDA_PREFIX")
6
- setwd(workdir)
7
- file.symlink(conda_prefix, "miniconda3")
8
- }, error=function(e) {})
9
-
10
- python = {{envs.rmagic_args.python | r}}
11
- Sys.setenv(RETICULATE_PYTHON = Sys.which(python))
12
- # reticulate::use_python(python)
14
+ python <- {{ envs.rmagic_args.python | r }}
15
+ Sys.setenv(RETICULATE_PYTHON = ifelse(grepl("/", python, fixed = TRUE), python, Sys.which(python)))
16
+ # reticulate::use_python(python, require = TRUE)
13
17
 
14
18
  library(Rmagic)
19
+ Rmagic:::load_pymagic()
20
+ pymagic <- tryCatch({
21
+ Rmagic:::pymagic
22
+ }, error = function(e) {
23
+ NULL
24
+ })
25
+ if (is.null(pymagic)) {
26
+ stop("Failed to load pymagic module. Please check your Python environment.\n ",
27
+ "Current python used by reticulate: ", reticulate::py_config()$python)
28
+ }
29
+
30
+ library(Matrix)
15
31
  library(Seurat)
32
+ library(biopipen.utils)
33
+
34
+ log <- get_logger()
35
+
36
+ infile <- {{ in.infile | r }}
37
+ outfile <- {{ out.outfile | r }}
38
+ threshold <- {{ envs.rmagic_args.threshold | r }}
39
+
40
+ log$info("Loading Seurat object ...")
41
+ sobj <- read_obj(infile)
42
+
43
+ if (threshold > 0) {
44
+ # only use the genes with expression in number of cells greater than threshold
45
+ log$info("Fetching genes with expression great than threshold ({threshold}) ...")
46
+ # get the expression matrix
47
+ layers <- Layers(sobj)
48
+ layer <- ifelse(!"counts" %in% layers, "data", "counts")
49
+ counts <- GetAssayData(sobj, layer = layer)
50
+ # Percent of cells expressing each gene
51
+ dropout_rates <- Matrix::rowSums(counts == 0) / ncol(counts)
52
+
53
+ # Genes to impute
54
+ genes_to_impute <- names(dropout_rates[dropout_rates > threshold])
55
+
56
+ log$info("- Will impute for {length(genes_to_impute)}/{length(dropout_rates)} genes ...")
57
+ rm(counts)
58
+ rm(dropout_rates)
59
+ gc()
60
+ } else {
61
+ genes_to_impute <- NULL
62
+ }
63
+
64
+ # get the expression matrix
65
+ data_impute <- t(GetAssayData(sobj, layer = "data"))
66
+
67
+ log$info("Running MAGIC ...")
68
+ check.int.or.null <- function(x) {
69
+ if (is.numeric(x = x)) {
70
+ x <- as.integer(x = x)
71
+ } else if (!is.null(x = x) && is.na(x = x)) {
72
+ x <- NULL
73
+ }
74
+ x
75
+ }
76
+
77
+ check.double.or.null <- function(x) {
78
+ if (is.numeric(x = x)) {
79
+ x <- as.integer(x = x)
80
+ } else if (!is.null(x = x) && is.na(x = x)) {
81
+ x <- NULL
82
+ }
83
+ x
84
+ }
85
+
86
+ check.int.or.string <- function(x, str) {
87
+ if (is.numeric(x = x)) {
88
+ x <- as.integer(x = x)
89
+ } else if (is.null(x = x) || is.na(x = x)) {
90
+ x <- str
91
+ }
92
+ x
93
+ }
94
+ # the magic function is defined in the Rmagic package
95
+ # it has a bug at line 138 when genes are given as a character vector
96
+ # See also https://github.com/KrishnaswamyLab/MAGIC/issues/227
97
+ magic_patched <- function(
98
+ data,
99
+ genes = NULL,
100
+ knn = 5,
101
+ knn.max = NULL,
102
+ decay = 1,
103
+ t = 3,
104
+ npca = 100,
105
+ solver = "exact",
106
+ init = NULL,
107
+ t.max = 20,
108
+ knn.dist.method = "euclidean",
109
+ verbose = 1,
110
+ n.jobs = 1,
111
+ seed = NULL,
112
+ # deprecated args
113
+ k = NULL, alpha = NULL,
114
+ ...) {
115
+ # check installation
116
+ # if (!reticulate::py_module_available(module = "magic") ||
117
+ # !exists("pymagic") || is.null(pymagic)) {
118
+ # Rmagic:::load_pymagic()
119
+ # }
120
+ # check for deprecated arguments
121
+ if (!is.null(k)) {
122
+ message("Argument k is deprecated. Using knn instead.")
123
+ knn <- k
124
+ }
125
+ if (!is.null(alpha)) {
126
+ message("Argument alpha is deprecated. Using decay instead.")
127
+ decay <- alpha
128
+ }
129
+ # validate parameters
130
+ knn <- as.integer(x = knn)
131
+ t.max <- as.integer(x = t.max)
132
+ n.jobs <- as.integer(x = n.jobs)
133
+ npca <- check.int.or.null(npca)
134
+ knn.max <- check.int.or.null(knn.max)
135
+ seed <- check.int.or.null(seed)
136
+ verbose <- check.int.or.null(verbose)
137
+ decay <- check.double.or.null(decay)
138
+ t <- check.int.or.string(t, "auto")
139
+ if (!methods::is(object = data, "Matrix")) {
140
+ data <- as.matrix(x = data)
141
+ }
142
+ # if (length(genes) <= 1 && (is.null(x = genes) || is.na(x = genes))) {
143
+ # ^^^^^^^^^^^^^^^^ bug here
144
+ if (length(genes) <= 1 && (is.null(x = genes) || (length(genes) == 1 && is.na(x = genes)))) {
145
+ genes <- NULL
146
+ gene_names <- colnames(x = data)
147
+ } else if (is.numeric(x = genes)) {
148
+ gene_names <- colnames(x = data)[genes]
149
+ genes <- as.integer(x = genes - 1)
150
+ } else if (length(x = genes) == 1 && genes == "all_genes") {
151
+ gene_names <- colnames(x = data)
152
+ } else if (length(x = genes) == 1 && genes == "pca_only") {
153
+ gene_names <- paste0("PC", 1:npca)
154
+ } else {
155
+ # character vector
156
+ if (!all(genes %in% colnames(x = data))) {
157
+ warning(paste0(
158
+ "Genes ",
159
+ genes[!(genes %in% colnames(data))],
160
+ " not found.",
161
+ collapse = ", "
162
+ ))
163
+ }
164
+ genes <- which(x = colnames(x = data) %in% genes)
165
+ gene_names <- colnames(x = data)[genes]
166
+ genes <- as.integer(x = genes - 1)
167
+ }
168
+ # store parameters
169
+ params <- list(
170
+ "data" = data,
171
+ "knn" = knn,
172
+ "knn.max" = knn.max,
173
+ "decay" = decay,
174
+ "t" = t,
175
+ "npca" = npca,
176
+ "solver" = solver,
177
+ "knn.dist.method" = knn.dist.method
178
+ )
179
+ # use pre-initialized values if given
180
+ operator <- NULL
181
+ if (!is.null(x = init)) {
182
+ if (!methods::is(init, "magic")) {
183
+ warning("object passed to init is not a phate object")
184
+ } else {
185
+ operator <- init$operator
186
+ operator$set_params(
187
+ knn = knn,
188
+ knn_max = knn.max,
189
+ decay = decay,
190
+ t = t,
191
+ n_pca = npca,
192
+ solver = solver,
193
+ knn_dist = knn.dist.method,
194
+ n_jobs = n.jobs,
195
+ random_state = seed,
196
+ verbose = verbose,
197
+ ...
198
+ )
199
+ }
200
+ }
201
+ if (is.null(x = operator)) {
202
+ operator <- pymagic$MAGIC(
203
+ knn = knn,
204
+ knn_max = knn.max,
205
+ decay = decay,
206
+ t = t,
207
+ n_pca = npca,
208
+ solver = solver,
209
+ knn_dist = knn.dist.method,
210
+ n_jobs = n.jobs,
211
+ random_state = seed,
212
+ verbose = verbose,
213
+ ...
214
+ )
215
+ }
216
+ result <- operator$fit_transform(
217
+ data,
218
+ genes = genes,
219
+ t_max = t.max
220
+ )
221
+ colnames(x = result) <- gene_names
222
+ rownames(x = result) <- rownames(data)
223
+ result <- as.data.frame(x = result)
224
+ result <- list(
225
+ "result" = result,
226
+ "operator" = operator,
227
+ "params" = params
228
+ )
229
+ class(x = result) <- c("magic", "list")
230
+ return(result)
231
+ }
16
232
 
17
- infile <- {{in.infile | r}}
18
- outfile <- {{out.outfile | r}}
233
+ data_impute <- magic_patched(data_impute, genes = genes_to_impute)
19
234
 
20
- sobj <- readRDS(infile)
21
- assay <- DefaultAssay(sobj)
235
+ if (threshold > 0) {
236
+ data <- t(GetAssayData(sobj, layer = "data"))
237
+ data_impute <- cbind(data[, setdiff(colnames(data), genes_to_impute)], Matrix::as.matrix(data_impute$result))
238
+ rm(data)
239
+ gc()
240
+ } else {
241
+ # if threshold is 0, then we need to transpose the data back
242
+ data_impute <- t(Matrix::as.matrix(data_impute$result))
243
+ }
22
244
 
23
- sobj <- magic(sobj)
24
- sobj <- RenameAssays(sobj, assay.name = assay, new.assay.name = "RAW")
25
- sobj <- RenameAssays(sobj, assay.name = "MAGIC_RNA", new.assay.name = assay)
245
+ log$info("Adding imputed data to Seurat object ...")
246
+ # Add imputed data to the Seurat object
247
+ sobj <- SetAssayData(
248
+ sobj,
249
+ layer = "data",
250
+ new.data = t(data_impute)
251
+ )
26
252
 
27
- DefaultAssay(sobj) <- assay
253
+ sobj@misc$impute_method <- "rmagic"
28
254
 
29
- attr(sobj, "impute") = "rmagic"
30
- saveRDS(sobj, outfile)
255
+ log$info("Saving Seurat object ...")
256
+ save_obj(sobj, outfile)
@@ -3,7 +3,7 @@ library(Seurat)
3
3
 
4
4
  infile = {{in.infile | r}}
5
5
  outfile = {{out.outfile | r}}
6
- joboutdir = "{{job.outdir}}/"
6
+ joboutdir = {{job.outdir | append: "/" | r}}
7
7
  drop_thre = {{envs.scimpute_args.drop_thre | r}}
8
8
  kcluster = {{(envs.scimpute_args.kcluster | default: None | r}}
9
9
  ncores = {{envs.scimpute_args.ncores | r}}
@@ -12,7 +12,7 @@ refgene = {{envs.scimpute_args.refgene | r}}
12
12
  setwd(joboutdir)
13
13
 
14
14
  labels = NULL
15
- sobj = readRDS(infile)
15
+ sobj = read_obj(infile)
16
16
  counts = as.data.frame(sobj@assays$RNA@counts)
17
17
  kc = length(unique(Idents(sobj)))
18
18
  if (kc > 0) {
@@ -38,6 +38,9 @@ scimpute(
38
38
  imputed = readRDS(file.path(joboutdir, "scimpute_count.rds"))
39
39
  outobj = CreateSeuratObject(counts = imputed)
40
40
 
41
- outobj@meta.data = sobj@meta.data[rownames(outobj@meta.data),,drop=FALSE]
42
- attr(outobj, "impute") = "scimpute"
43
- saveRDS(outobj, outfile)
41
+ outobj@meta.data = sobj@meta.data[rownames(outobj@meta.data), , drop=FALSE]
42
+ # remember that it is the counts being imputed, we still need to
43
+ # normalize the data
44
+ outobj@misc$impute_method = "scimpute"
45
+
46
+ save_obj(outobj, outfile)
@@ -0,0 +1,51 @@
1
+ library(loomR)
2
+ library(DropletUtils)
3
+ library(Matrix)
4
+
5
+ loomfile <- {{in.loomfile | r}}
6
+ outdir <- {{out.outdir | r}}
7
+
8
+ lfile <- connect(filename = loomfile, mode = "r")
9
+
10
+ # Extract the expression matrix (genes x cells)
11
+ expr_matrix <- t(lfile[["matrix"]][, ])
12
+ if (!inherits(expr_matrix, "dgCMatrix")) {
13
+ expr_matrix <- Matrix::Matrix(expr_matrix, sparse = TRUE)
14
+ }
15
+
16
+ # Extract gene names and IDs
17
+ gene_names <- lfile[["row_attrs/Gene"]][]
18
+
19
+ gene_ids <- tryCatch({
20
+ lfile[["row_attrs/GeneID"]][]
21
+ }, error = function(e) {
22
+ NULL
23
+ })
24
+
25
+ if (is.null(gene_ids)) {
26
+ gene_ids <- gene_names
27
+ }
28
+
29
+ # Extract cell barcodes
30
+ cell_barcodes <- lfile[["col_attrs/CellID"]][]
31
+
32
+ # Close the LOOM file connection
33
+ lfile$close_all()
34
+
35
+ # Create a data frame for gene information
36
+ gene_info <- data.frame(
37
+ gene_id = gene_ids,
38
+ gene_name = gene_names
39
+ )
40
+
41
+ # Write the data to 10X format
42
+
43
+ write10xCounts(
44
+ path = outdir,
45
+ x = expr_matrix,
46
+ gene.id = gene_info$gene_id,
47
+ gene.symbol = gene_info$gene_name,
48
+ barcodes = cell_barcodes,
49
+ version = "3",
50
+ overwrite = TRUE
51
+ )