@platforma-open/milaboratories.run-diff-clonotype-abundance-deseq2-r.software 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
WARN Issue while reading "/home/runner/work/differential-clonotype-abundance/differential-clonotype-abundance/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
|
|
2
|
+
|
|
3
|
+
> @platforma-open/milaboratories.run-diff-clonotype-abundance-deseq2-r.software@1.7.0 build /home/runner/work/differential-clonotype-abundance/differential-clonotype-abundance/software
|
|
4
|
+
> pl-pkg build
|
|
5
|
+
|
|
6
|
+
[32minfo[39m: Rendering entrypoint descriptors...
|
|
7
|
+
[32minfo[39m: Writing entrypoint descriptor to '/home/runner/work/differential-clonotype-abundance/differential-clonotype-abundance/software/dist/tengo/software/main.sw.json'
|
|
8
|
+
[32minfo[39m: Building software package 'main' for platform 'linux-x64'...
|
|
9
|
+
[32minfo[39m: packing software into a package
|
|
10
|
+
[32minfo[39m: generating cross-platform package
|
|
11
|
+
[32minfo[39m: software package was written to '/home/runner/work/differential-clonotype-abundance/differential-clonotype-abundance/software/pkg-platforma-open-milaboratories.run-diff-clonotype-abundance-deseq2-r.software-main-1.7.0.tgz'
|
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"name":"@platforma-open/milaboratories.run-diff-clonotype-abundance-deseq2-r.software:main","binary":{"type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.run-diff-clonotype-abundance-deseq2-r.software/main/1.7.0.tgz","cmd":["Rscript","{pkg}/run_onlyDESeq2.R"],"envVars":[],"runEnv":{"name":"@platforma-open/milaboratories.runenv-r-differential-expression:main","type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.runenv-r-differential-expression/main/1.0.7-{os}-{arch}.tgz","binDir":"bin"},"toolset":"renv","dependencies":{}}}
|
package/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@platforma-open/milaboratories.run-diff-clonotype-abundance-deseq2-r.software",
|
|
3
|
+
"version": "1.7.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Block Software: Run DESeq2 with R",
|
|
6
|
+
"block-software": {
|
|
7
|
+
"entrypoints": {
|
|
8
|
+
"main": {
|
|
9
|
+
"binary": {
|
|
10
|
+
"artifact": {
|
|
11
|
+
"type": "R",
|
|
12
|
+
"registry": "platforma-open",
|
|
13
|
+
"environment": "@platforma-open/milaboratories.runenv-r-differential-expression:main",
|
|
14
|
+
"root": "./src"
|
|
15
|
+
},
|
|
16
|
+
"cmd": [
|
|
17
|
+
"Rscript",
|
|
18
|
+
"{pkg}/run_onlyDESeq2.R"
|
|
19
|
+
]
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"devDependencies": {
|
|
25
|
+
"@platforma-open/milaboratories.runenv-r-differential-expression": "^1.0.6",
|
|
26
|
+
"@platforma-sdk/package-builder": "^2.15.6"
|
|
27
|
+
},
|
|
28
|
+
"scripts": {
|
|
29
|
+
"build": "pl-pkg build",
|
|
30
|
+
"do-pack": "rm -f *.tgz && pl-pkg build && pnpm pack && mv platforma-open*.tgz package.tgz",
|
|
31
|
+
"changeset": "changeset",
|
|
32
|
+
"version-packages": "changeset version"
|
|
33
|
+
}
|
|
34
|
+
}
|
|
Binary file
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#!/usr/bin/env Rscript
|
|
2
|
+
|
|
3
|
+
# Load libraries
|
|
4
|
+
suppressMessages(library("optparse"))
|
|
5
|
+
suppressMessages(library("tidyr"))
|
|
6
|
+
suppressMessages(library("dplyr"))
|
|
7
|
+
suppressMessages(library("DESeq2"))
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# DESeq2 script using above declared functions
|
|
11
|
+
option_list <- list(
|
|
12
|
+
make_option(c("-c", "--count_matrix"), type = "character", default = NULL,
|
|
13
|
+
help = "Path to count matrix CSV file", metavar = "character"),
|
|
14
|
+
make_option(c("-m", "--metadata"), type = "character", default = NULL,
|
|
15
|
+
help = "Path to metadata CSV file", metavar = "character"),
|
|
16
|
+
make_option(c("-t", "--contrast_factor"), type = "character", default = NULL,
|
|
17
|
+
help = "Column name in metadata for the contrast",
|
|
18
|
+
metavar = "character"),
|
|
19
|
+
make_option(c("-n", "--numerator"), type = "character", default = NULL,
|
|
20
|
+
help = "Numerator level for contrast factor",
|
|
21
|
+
metavar = "character"),
|
|
22
|
+
make_option(c("-d", "--denominator"), type = "character", default = NULL,
|
|
23
|
+
help = "Denominator level for contrast factor",
|
|
24
|
+
metavar = "character"),
|
|
25
|
+
make_option(c("-o", "--output"), type = "character",
|
|
26
|
+
default = "deseq2_results.csv",
|
|
27
|
+
help = "Output CSV file for results", metavar = "character"),
|
|
28
|
+
make_option(c("-f", "--fc_threshold"), type = "double", default = 1,
|
|
29
|
+
help = "Adjusted p-value threshold for significance"),
|
|
30
|
+
make_option(c("-p", "--p_threshold"), type = "double", default = 0.05,
|
|
31
|
+
help = "Adjusted p-value threshold for significance"),
|
|
32
|
+
make_option(c("-v", "--values_column"), type = "character",
|
|
33
|
+
default = "Number of UMIs",
|
|
34
|
+
help = "Name of column containing counts"),
|
|
35
|
+
make_option(c("-i", "--IDs_column"), type = "character",
|
|
36
|
+
default = "Clonotype key",
|
|
37
|
+
help = "Name of column containing gene/clonotype IDs"),
|
|
38
|
+
make_option(c("-x", "--min_counts"), type = "double", default = 5,
|
|
39
|
+
help = "minimum number of counts in fraction of samples defined by fraction_for_filter"),
|
|
40
|
+
make_option(c("-y", "--fraction_for_filter"), type = "double", default = 0.9,
|
|
41
|
+
help = "Fraction of samples that should have more than X min_counts to be accepted in analysis")
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
opt_parser <- OptionParser(option_list = option_list)
|
|
45
|
+
opt <- parse_args(opt_parser)
|
|
46
|
+
|
|
47
|
+
if (is.null(opt$count_matrix) || is.null(opt$metadata) || is.null(opt$contrast_factor) || is.null(opt$numerator) || is.null(opt$denominator)) {
|
|
48
|
+
stop("Missing required arguments")
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
# Load count matrix and covariates metadata
|
|
52
|
+
count_long <- read.csv(opt$count_matrix, check.names = FALSE)
|
|
53
|
+
metadata <- read.csv(opt$metadata, row.names = 1, check.names = FALSE)
|
|
54
|
+
|
|
55
|
+
# Rename some input variables
|
|
56
|
+
values_col <- opt$values_column
|
|
57
|
+
ids_col <- opt$IDs_column
|
|
58
|
+
min_counts <- opt$min_counts
|
|
59
|
+
filter_fraction <- opt$fraction_for_filter
|
|
60
|
+
|
|
61
|
+
# Validate contrast factor
|
|
62
|
+
if (!opt$contrast_factor %in% colnames(metadata)) {
|
|
63
|
+
stop("Contrast factor column not found in metadata")
|
|
64
|
+
}
|
|
65
|
+
if (!(opt$numerator %in% metadata[[opt$contrast_factor]])) {
|
|
66
|
+
stop("Numerator not found in contrast factor column")
|
|
67
|
+
}
|
|
68
|
+
if (!(opt$denominator %in% metadata[[opt$contrast_factor]])) {
|
|
69
|
+
stop("Denominator not found in contrast factor column")
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
colnames(metadata) <- make.names(colnames(metadata))
|
|
73
|
+
|
|
74
|
+
# Transform long format to wide format
|
|
75
|
+
count_matrix <- count_long %>%
|
|
76
|
+
pivot_wider(names_from = Sample, values_from = values_col) %>%
|
|
77
|
+
as.data.frame()
|
|
78
|
+
|
|
79
|
+
# Set Id columns as row name and remove it
|
|
80
|
+
rownames(count_matrix) <- count_matrix[, ids_col]
|
|
81
|
+
count_matrix <- count_matrix[, -1]
|
|
82
|
+
|
|
83
|
+
# Convert NA values to zero
|
|
84
|
+
count_matrix[is.na(count_matrix)] <- 0
|
|
85
|
+
|
|
86
|
+
# Apply filter by low counts (at least filter by values in one sample)
|
|
87
|
+
min_samples <- max(floor(ncol(count_matrix) * filter_fraction), 1)
|
|
88
|
+
count_matrix <- count_matrix[rowSums(count_matrix >= min_counts) >= min_samples, ]
|
|
89
|
+
|
|
90
|
+
# Prepare DESeq2 dataset
|
|
91
|
+
dds <- DESeqDataSetFromMatrix(
|
|
92
|
+
countData = count_matrix,
|
|
93
|
+
colData = metadata,
|
|
94
|
+
design = as.formula(paste("~", paste(colnames(metadata), collapse = " + ")))
|
|
95
|
+
)
|
|
96
|
+
dds <- DESeq(dds, fitType = "local")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# Extract topTable
|
|
100
|
+
res <- results(dds, contrast = c(make.names(opt$contrast_factor), opt$numerator, opt$denominator))
|
|
101
|
+
res_df <- as.data.frame(res)
|
|
102
|
+
|
|
103
|
+
# Tidy table
|
|
104
|
+
res_df[ids_col] <- rownames(res_df)
|
|
105
|
+
res_df$minlog10padj <- -log10(res_df$padj)
|
|
106
|
+
res_df$minlog10padj[is.na(res_df$minlog10padj)] <- NA
|
|
107
|
+
|
|
108
|
+
# Add regulation direction
|
|
109
|
+
res_df$Regulation <- ifelse(res_df$log2FoldChange >= opt$fc_threshold, "Up",
|
|
110
|
+
ifelse(res_df$log2FoldChange <= -opt$fc_threshold,
|
|
111
|
+
"Down", "NS"))
|
|
112
|
+
|
|
113
|
+
# Reorder columns
|
|
114
|
+
res_df <- res_df[, c(ids_col, "Regulation",
|
|
115
|
+
setdiff(colnames(res_df), c(ids_col,
|
|
116
|
+
"Regulation")))]
|
|
117
|
+
|
|
118
|
+
# Save topTable as csv
|
|
119
|
+
write.csv(res_df, opt$output, row.names = FALSE)
|
|
120
|
+
cat("Full results saved to", opt$output, "\n")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# Filter DEGs with adjusted p-value < p_threshold and absolute log2FoldChange > fc_threshold
|
|
124
|
+
deg_df <- res_df[
|
|
125
|
+
res_df$padj <= opt$p_threshold & abs(res_df$log2FoldChange) >= opt$fc_threshold,
|
|
126
|
+
c(ids_col, "log2FoldChange", "Regulation")
|
|
127
|
+
]
|
|
128
|
+
# Filter out counts without ID
|
|
129
|
+
deg_df <- deg_df[!is.na(deg_df[ids_col]),]
|
|
130
|
+
|
|
131
|
+
# Save DEC as csv
|
|
132
|
+
write.csv(deg_df, "DEG.csv", row.names = FALSE)
|
|
133
|
+
cat("Filtered DEGs saved to", "DEG.csv", "\n")
|