@platforma-open/milaboratories.run-diff-clonotype-abundance-deseq2-r.software 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+  WARN  Issue while reading "/home/runner/work/differential-clonotype-abundance/differential-clonotype-abundance/.npmrc". Failed to replace env in config: ${NPMJS_TOKEN}
2
+
3
+ > @platforma-open/milaboratories.run-diff-clonotype-abundance-deseq2-r.software@1.7.0 build /home/runner/work/differential-clonotype-abundance/differential-clonotype-abundance/software
4
+ > pl-pkg build
5
+
6
+ info: Rendering entrypoint descriptors...
7
+ info: Writing entrypoint descriptor to '/home/runner/work/differential-clonotype-abundance/differential-clonotype-abundance/software/dist/tengo/software/main.sw.json'
8
+ info: Building software package 'main' for platform 'linux-x64'...
9
+ info: packing software into a package
10
+ info: generating cross-platform package
11
+ info: software package was written to '/home/runner/work/differential-clonotype-abundance/differential-clonotype-abundance/software/pkg-platforma-open-milaboratories.run-diff-clonotype-abundance-deseq2-r.software-main-1.7.0.tgz'
package/CHANGELOG.md ADDED
@@ -0,0 +1,14 @@
1
+ # @platforma-open/milaboratories.run-deseq2-r.software
2
+
3
+ ## 1.7.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 2086193: Fix software naming
8
+
9
+ ## 1.6.0
10
+
11
+ ### Minor Changes
12
+
13
+ - 83b4ffc: Package updates
14
+ - e2f5931: First MVB
@@ -0,0 +1 @@
1
+ {"name":"@platforma-open/milaboratories.run-diff-clonotype-abundance-deseq2-r.software:main","binary":{"type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.run-diff-clonotype-abundance-deseq2-r.software/main/1.7.0.tgz","cmd":["Rscript","{pkg}/run_onlyDESeq2.R"],"envVars":[],"runEnv":{"name":"@platforma-open/milaboratories.runenv-r-differential-expression:main","type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.runenv-r-differential-expression/main/1.0.7-{os}-{arch}.tgz","binDir":"bin"},"toolset":"renv","dependencies":{}}}
package/package.json ADDED
@@ -0,0 +1,34 @@
1
+ {
2
+ "name": "@platforma-open/milaboratories.run-diff-clonotype-abundance-deseq2-r.software",
3
+ "version": "1.7.0",
4
+ "type": "module",
5
+ "description": "Block Software: Run DESeq2 with R",
6
+ "block-software": {
7
+ "entrypoints": {
8
+ "main": {
9
+ "binary": {
10
+ "artifact": {
11
+ "type": "R",
12
+ "registry": "platforma-open",
13
+ "environment": "@platforma-open/milaboratories.runenv-r-differential-expression:main",
14
+ "root": "./src"
15
+ },
16
+ "cmd": [
17
+ "Rscript",
18
+ "{pkg}/run_onlyDESeq2.R"
19
+ ]
20
+ }
21
+ }
22
+ }
23
+ },
24
+ "devDependencies": {
25
+ "@platforma-open/milaboratories.runenv-r-differential-expression": "^1.0.6",
26
+ "@platforma-sdk/package-builder": "^2.15.6"
27
+ },
28
+ "scripts": {
29
+ "build": "pl-pkg build",
30
+ "do-pack": "rm -f *.tgz && pl-pkg build && pnpm pack && mv platforma-open*.tgz package.tgz",
31
+ "changeset": "changeset",
32
+ "version-packages": "changeset version"
33
+ }
34
+ }
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env Rscript
2
+
3
+ # Load libraries
4
+ suppressMessages(library("optparse"))
5
+ suppressMessages(library("tidyr"))
6
+ suppressMessages(library("dplyr"))
7
+ suppressMessages(library("DESeq2"))
8
+
9
+
10
+ # DESeq2 script using above declared functions
11
+ option_list <- list(
12
+ make_option(c("-c", "--count_matrix"), type = "character", default = NULL,
13
+ help = "Path to count matrix CSV file", metavar = "character"),
14
+ make_option(c("-m", "--metadata"), type = "character", default = NULL,
15
+ help = "Path to metadata CSV file", metavar = "character"),
16
+ make_option(c("-t", "--contrast_factor"), type = "character", default = NULL,
17
+ help = "Column name in metadata for the contrast",
18
+ metavar = "character"),
19
+ make_option(c("-n", "--numerator"), type = "character", default = NULL,
20
+ help = "Numerator level for contrast factor",
21
+ metavar = "character"),
22
+ make_option(c("-d", "--denominator"), type = "character", default = NULL,
23
+ help = "Denominator level for contrast factor",
24
+ metavar = "character"),
25
+ make_option(c("-o", "--output"), type = "character",
26
+ default = "deseq2_results.csv",
27
+ help = "Output CSV file for results", metavar = "character"),
28
+ make_option(c("-f", "--fc_threshold"), type = "double", default = 1,
29
+ help = "Adjusted p-value threshold for significance"),
30
+ make_option(c("-p", "--p_threshold"), type = "double", default = 0.05,
31
+ help = "Adjusted p-value threshold for significance"),
32
+ make_option(c("-v", "--values_column"), type = "character",
33
+ default = "Number of UMIs",
34
+ help = "Name of column containing counts"),
35
+ make_option(c("-i", "--IDs_column"), type = "character",
36
+ default = "Clonotype key",
37
+ help = "Name of column containing gene/clonotype IDs"),
38
+ make_option(c("-x", "--min_counts"), type = "double", default = 5,
39
+ help = "minimum number of counts in fraction of samples defined by fraction_for_filter"),
40
+ make_option(c("-y", "--fraction_for_filter"), type = "double", default = 0.9,
41
+ help = "Fraction of samples that should have more than X min_counts to be accepted in analysis")
42
+ )
43
+
44
+ opt_parser <- OptionParser(option_list = option_list)
45
+ opt <- parse_args(opt_parser)
46
+
47
+ if (is.null(opt$count_matrix) || is.null(opt$metadata) || is.null(opt$contrast_factor) || is.null(opt$numerator) || is.null(opt$denominator)) {
48
+ stop("Missing required arguments")
49
+ }
50
+
51
+ # Load count matrix and covariates metadata
52
+ count_long <- read.csv(opt$count_matrix, check.names = FALSE)
53
+ metadata <- read.csv(opt$metadata, row.names = 1, check.names = FALSE)
54
+
55
+ # Rename some input variables
56
+ values_col <- opt$values_column
57
+ ids_col <- opt$IDs_column
58
+ min_counts <- opt$min_counts
59
+ filter_fraction <- opt$fraction_for_filter
60
+
61
+ # Validate contrast factor
62
+ if (!opt$contrast_factor %in% colnames(metadata)) {
63
+ stop("Contrast factor column not found in metadata")
64
+ }
65
+ if (!(opt$numerator %in% metadata[[opt$contrast_factor]])) {
66
+ stop("Numerator not found in contrast factor column")
67
+ }
68
+ if (!(opt$denominator %in% metadata[[opt$contrast_factor]])) {
69
+ stop("Denominator not found in contrast factor column")
70
+ }
71
+
72
+ colnames(metadata) <- make.names(colnames(metadata))
73
+
74
+ # Transform long format to wide format
75
+ count_matrix <- count_long %>%
76
+ pivot_wider(names_from = Sample, values_from = values_col) %>%
77
+ as.data.frame()
78
+
79
+ # Set Id columns as row name and remove it
80
+ rownames(count_matrix) <- count_matrix[, ids_col]
81
+ count_matrix <- count_matrix[, -1]
82
+
83
+ # Convert NA values to zero
84
+ count_matrix[is.na(count_matrix)] <- 0
85
+
86
+ # Apply filter by low counts (at least filter by values in one sample)
87
+ min_samples <- max(floor(ncol(count_matrix) * filter_fraction), 1)
88
+ count_matrix <- count_matrix[rowSums(count_matrix >= min_counts) >= min_samples, ]
89
+
90
+ # Prepare DESeq2 dataset
91
+ dds <- DESeqDataSetFromMatrix(
92
+ countData = count_matrix,
93
+ colData = metadata,
94
+ design = as.formula(paste("~", paste(colnames(metadata), collapse = " + ")))
95
+ )
96
+ dds <- DESeq(dds, fitType = "local")
97
+
98
+
99
+ # Extract topTable
100
+ res <- results(dds, contrast = c(make.names(opt$contrast_factor), opt$numerator, opt$denominator))
101
+ res_df <- as.data.frame(res)
102
+
103
+ # Tidy table
104
+ res_df[ids_col] <- rownames(res_df)
105
+ res_df$minlog10padj <- -log10(res_df$padj)
106
+ res_df$minlog10padj[is.na(res_df$minlog10padj)] <- NA
107
+
108
+ # Add regulation direction
109
+ res_df$Regulation <- ifelse(res_df$log2FoldChange >= opt$fc_threshold, "Up",
110
+ ifelse(res_df$log2FoldChange <= -opt$fc_threshold,
111
+ "Down", "NS"))
112
+
113
+ # Reorder columns
114
+ res_df <- res_df[, c(ids_col, "Regulation",
115
+ setdiff(colnames(res_df), c(ids_col,
116
+ "Regulation")))]
117
+
118
+ # Save topTable as csv
119
+ write.csv(res_df, opt$output, row.names = FALSE)
120
+ cat("Full results saved to", opt$output, "\n")
121
+
122
+
123
+ # Filter DEGs with adjusted p-value < p_threshold and absolute log2FoldChange > fc_threshold
124
+ deg_df <- res_df[
125
+ res_df$padj <= opt$p_threshold & abs(res_df$log2FoldChange) >= opt$fc_threshold,
126
+ c(ids_col, "log2FoldChange", "Regulation")
127
+ ]
128
+ # Filter out counts without ID
129
+ deg_df <- deg_df[!is.na(deg_df[ids_col]),]
130
+
131
+ # Save DEC as csv
132
+ write.csv(deg_df, "DEG.csv", row.names = FALSE)
133
+ cat("Filtered DEGs saved to", "DEG.csv", "\n")