@platforma-open/milaboratories.run-tcrdisco-enrichment.software 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +2183 -0
- package/CHANGELOG.md +7 -0
- package/Dockerfile +56 -0
- package/dist/artifacts/get-enriched-frequencies/archive.json +1 -0
- package/dist/artifacts/get-enriched-frequencies/docker_x64.json +1 -0
- package/dist/artifacts/subset-assignment/archive.json +1 -0
- package/dist/artifacts/subset-assignment/docker_x64.json +1 -0
- package/dist/artifacts/tcr-ab-pairs/archive.json +1 -0
- package/dist/artifacts/tcr-ab-pairs/docker_x64.json +1 -0
- package/dist/artifacts/tcr-disco/archive.json +1 -0
- package/dist/artifacts/tcr-disco/docker_x64.json +1 -0
- package/dist/tengo/software/get-enriched-frequencies.sw.json +1 -0
- package/dist/tengo/software/subset-assignment.sw.json +1 -0
- package/dist/tengo/software/tcr-ab-pairs.sw.json +1 -0
- package/dist/tengo/software/tcr-disco.sw.json +1 -0
- package/package.json +116 -0
- package/pkg-platforma-open-milaboratories.run-tcrdisco-enrichment.software-get-enriched-frequencies-1.1.0.tgz +0 -0
- package/pkg-platforma-open-milaboratories.run-tcrdisco-enrichment.software-subset-assignment-1.1.0.tgz +0 -0
- package/pkg-platforma-open-milaboratories.run-tcrdisco-enrichment.software-tcr-ab-pairs-1.1.0.tgz +0 -0
- package/pkg-platforma-open-milaboratories.run-tcrdisco-enrichment.software-tcr-disco-1.1.0.tgz +0 -0
- package/src/tcr-disco/find-pairs.R +271 -0
- package/src/tcr-disco/get-enriched-frequencies.R +102 -0
- package/src/tcr-disco/main.R +291 -0
- package/src/tcr-disco/renv.lock +6375 -0
- package/src/tcr-disco/subset-assignment.R +194 -0
package/CHANGELOG.md
ADDED
package/Dockerfile
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
FROM r-base:4.4.2
|
|
2
|
+
|
|
3
|
+
ENV R_VERSION=4.4.2
|
|
4
|
+
ENV BIOCONDUCTOR_VERSION=3.20
|
|
5
|
+
ENV RENV_VERSION=1.0.11
|
|
6
|
+
ENV RENV_CONFIG_PPM_ENABLED=false
|
|
7
|
+
ENV RENV_CONFIG_REPOS_OVERRIDE=https://cloud.r-project.org
|
|
8
|
+
|
|
9
|
+
# Install system dependencies required for R packages
|
|
10
|
+
RUN apt-get update && apt-get install -y \
|
|
11
|
+
libcurl4-openssl-dev \
|
|
12
|
+
libssl-dev \
|
|
13
|
+
libxml2-dev \
|
|
14
|
+
libcairo2-dev \
|
|
15
|
+
libgit2-dev \
|
|
16
|
+
default-libmysqlclient-dev \
|
|
17
|
+
libpq-dev \
|
|
18
|
+
libsasl2-dev \
|
|
19
|
+
libsqlite3-dev \
|
|
20
|
+
libssh2-1-dev \
|
|
21
|
+
unixodbc-dev \
|
|
22
|
+
libharfbuzz-dev \
|
|
23
|
+
libfribidi-dev \
|
|
24
|
+
libfreetype6-dev \
|
|
25
|
+
libpng-dev \
|
|
26
|
+
libtiff5-dev \
|
|
27
|
+
libjpeg-dev \
|
|
28
|
+
libgsl-dev \
|
|
29
|
+
libbz2-dev \
|
|
30
|
+
liblzma-dev \
|
|
31
|
+
libpcre2-dev \
|
|
32
|
+
&& apt-get clean \
|
|
33
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
34
|
+
|
|
35
|
+
# Set working directory
|
|
36
|
+
WORKDIR /app
|
|
37
|
+
|
|
38
|
+
RUN R -e "install.packages('renv', repos = 'https://cloud.r-project.org')"
|
|
39
|
+
|
|
40
|
+
# .Rprofile breaks renv restore
|
|
41
|
+
# Copy only files that are needed
|
|
42
|
+
COPY *.R .
|
|
43
|
+
COPY ./renv.lock ./renv.lock
|
|
44
|
+
|
|
45
|
+
# Have to use absolute paths because during execution the working directory will be changed
|
|
46
|
+
# and realpath command does not work
|
|
47
|
+
ENV RENV_PATHS_RENV=/app/renv
|
|
48
|
+
ENV RENV_PATHS_LOCKFILE=/app/renv.lock
|
|
49
|
+
|
|
50
|
+
RUN R --no-echo -e "renv::restore(clean = TRUE)"
|
|
51
|
+
|
|
52
|
+
RUN echo "#!/bin/bash\nR --no-echo --no-restore -e \"renv::restore()\"\n\"\$@\"" > /app/run.sh
|
|
53
|
+
RUN chmod +x /app/run.sh
|
|
54
|
+
|
|
55
|
+
# Default command runs Rscript
|
|
56
|
+
ENTRYPOINT ["/app/run.sh"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"R","platform":"linux-x64","registryURL":"https://bin.pl-open.science/","registryName":"platforma-open","remoteArtifactLocation":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/get-enriched-frequencies/1.1.0.tgz","uploadPath":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/get-enriched-frequencies/1.1.0.tgz"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"docker","platform":"linux-x64","remoteArtifactLocation":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.get-enriched-frequencies.c8e04e155280"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"R","platform":"linux-x64","registryURL":"https://bin.pl-open.science/","registryName":"platforma-open","remoteArtifactLocation":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/subset-assignment/1.1.0.tgz","uploadPath":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/subset-assignment/1.1.0.tgz"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"docker","platform":"linux-x64","remoteArtifactLocation":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.subset-assignment.eb67f96e9c9a"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"R","platform":"linux-x64","registryURL":"https://bin.pl-open.science/","registryName":"platforma-open","remoteArtifactLocation":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-ab-pairs/1.1.0.tgz","uploadPath":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-ab-pairs/1.1.0.tgz"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"docker","platform":"linux-x64","remoteArtifactLocation":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.tcr-ab-pairs.2e1cbb240fae"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"R","platform":"linux-x64","registryURL":"https://bin.pl-open.science/","registryName":"platforma-open","remoteArtifactLocation":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-disco/1.1.0.tgz","uploadPath":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-disco/1.1.0.tgz"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"docker","platform":"linux-x64","remoteArtifactLocation":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.tcr-disco.af738a23e37d"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"name":"@platforma-open/milaboratories.run-tcrdisco-enrichment.software:get-enriched-frequencies","docker":{"tag":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.get-enriched-frequencies.c8e04e155280","entrypoint":[],"cmd":["/app/get-enriched-frequencies.R"],"pkg":"/app"},"binary":{"type":"R","registry":"platforma-open","package":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/get-enriched-frequencies/1.1.0.tgz","cmd":["Rscript","{pkg}/get-enriched-frequencies.R"],"envVars":[],"runEnv":{"name":"@platforma-open/milaboratories.runenv-r-differential-expression:main","type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.runenv-r-differential-expression/main/1.0.7-{os}-{arch}.tgz","binDir":"bin","r-version":""},"toolset":"renv","dependencies":{}}}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"name":"@platforma-open/milaboratories.run-tcrdisco-enrichment.software:subset-assignment","docker":{"tag":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.subset-assignment.eb67f96e9c9a","entrypoint":[],"cmd":["/app/main.R"],"pkg":"/app"},"binary":{"type":"R","registry":"platforma-open","package":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/subset-assignment/1.1.0.tgz","cmd":["Rscript","{pkg}/subset-assignment.R"],"envVars":[],"runEnv":{"name":"@platforma-open/milaboratories.runenv-r-differential-expression:main","type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.runenv-r-differential-expression/main/1.0.7-{os}-{arch}.tgz","binDir":"bin","r-version":""},"toolset":"renv","dependencies":{}}}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"name":"@platforma-open/milaboratories.run-tcrdisco-enrichment.software:tcr-ab-pairs","docker":{"tag":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.tcr-ab-pairs.2e1cbb240fae","entrypoint":[],"cmd":["/app/find-pairs.R"],"pkg":"/app"},"binary":{"type":"R","registry":"platforma-open","package":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-ab-pairs/1.1.0.tgz","cmd":["Rscript","{pkg}/find-pairs.R"],"envVars":[],"runEnv":{"name":"@platforma-open/milaboratories.runenv-r-differential-expression:main","type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.runenv-r-differential-expression/main/1.0.7-{os}-{arch}.tgz","binDir":"bin","r-version":""},"toolset":"renv","dependencies":{}}}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"name":"@platforma-open/milaboratories.run-tcrdisco-enrichment.software:tcr-disco","docker":{"tag":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.tcr-disco.af738a23e37d","entrypoint":[],"cmd":["/app/main.R"],"pkg":"/app"},"binary":{"type":"R","registry":"platforma-open","package":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-disco/1.1.0.tgz","cmd":["Rscript","{pkg}/main.R"],"envVars":[],"runEnv":{"name":"@platforma-open/milaboratories.runenv-r-differential-expression:main","type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.runenv-r-differential-expression/main/1.0.7-{os}-{arch}.tgz","binDir":"bin","r-version":""},"toolset":"renv","dependencies":{}}}
|
package/package.json
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@platforma-open/milaboratories.run-tcrdisco-enrichment.software",
|
|
3
|
+
"version": "1.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Block Software: Run TCR Disco with R",
|
|
6
|
+
"block-software": {
|
|
7
|
+
"entrypoints": {
|
|
8
|
+
"subset-assignment": {
|
|
9
|
+
"binary": {
|
|
10
|
+
"artifact": {
|
|
11
|
+
"type": "R",
|
|
12
|
+
"registry": "platforma-open",
|
|
13
|
+
"environment": "@platforma-open/milaboratories.runenv-r-differential-expression:main",
|
|
14
|
+
"root": "./src/tcr-disco"
|
|
15
|
+
},
|
|
16
|
+
"cmd": [
|
|
17
|
+
"Rscript",
|
|
18
|
+
"{pkg}/subset-assignment.R"
|
|
19
|
+
]
|
|
20
|
+
},
|
|
21
|
+
"docker": {
|
|
22
|
+
"artifact": {
|
|
23
|
+
"type": "docker",
|
|
24
|
+
"context": "./src/tcr-disco",
|
|
25
|
+
"dockerfile": "Dockerfile"
|
|
26
|
+
},
|
|
27
|
+
"cmd": [
|
|
28
|
+
"/app/main.R"
|
|
29
|
+
]
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
"tcr-disco": {
|
|
33
|
+
"binary": {
|
|
34
|
+
"artifact": {
|
|
35
|
+
"type": "R",
|
|
36
|
+
"registry": "platforma-open",
|
|
37
|
+
"environment": "@platforma-open/milaboratories.runenv-r-differential-expression:main",
|
|
38
|
+
"root": "./src/tcr-disco"
|
|
39
|
+
},
|
|
40
|
+
"cmd": [
|
|
41
|
+
"Rscript",
|
|
42
|
+
"{pkg}/main.R"
|
|
43
|
+
]
|
|
44
|
+
},
|
|
45
|
+
"docker": {
|
|
46
|
+
"artifact": {
|
|
47
|
+
"type": "docker",
|
|
48
|
+
"context": "./src/tcr-disco",
|
|
49
|
+
"dockerfile": "Dockerfile"
|
|
50
|
+
},
|
|
51
|
+
"cmd": [
|
|
52
|
+
"/app/main.R"
|
|
53
|
+
]
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
"get-enriched-frequencies": {
|
|
57
|
+
"binary": {
|
|
58
|
+
"artifact": {
|
|
59
|
+
"type": "R",
|
|
60
|
+
"registry": "platforma-open",
|
|
61
|
+
"environment": "@platforma-open/milaboratories.runenv-r-differential-expression:main",
|
|
62
|
+
"root": "./src/tcr-disco"
|
|
63
|
+
},
|
|
64
|
+
"cmd": [
|
|
65
|
+
"Rscript",
|
|
66
|
+
"{pkg}/get-enriched-frequencies.R"
|
|
67
|
+
]
|
|
68
|
+
},
|
|
69
|
+
"docker": {
|
|
70
|
+
"artifact": {
|
|
71
|
+
"type": "docker",
|
|
72
|
+
"context": "./src/tcr-disco",
|
|
73
|
+
"dockerfile": "Dockerfile"
|
|
74
|
+
},
|
|
75
|
+
"cmd": [
|
|
76
|
+
"/app/get-enriched-frequencies.R"
|
|
77
|
+
]
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
"tcr-ab-pairs": {
|
|
81
|
+
"binary": {
|
|
82
|
+
"artifact": {
|
|
83
|
+
"type": "R",
|
|
84
|
+
"registry": "platforma-open",
|
|
85
|
+
"environment": "@platforma-open/milaboratories.runenv-r-differential-expression:main",
|
|
86
|
+
"root": "./src/tcr-disco"
|
|
87
|
+
},
|
|
88
|
+
"cmd": [
|
|
89
|
+
"Rscript",
|
|
90
|
+
"{pkg}/find-pairs.R"
|
|
91
|
+
]
|
|
92
|
+
},
|
|
93
|
+
"docker": {
|
|
94
|
+
"artifact": {
|
|
95
|
+
"type": "docker",
|
|
96
|
+
"context": "./src/tcr-disco",
|
|
97
|
+
"dockerfile": "Dockerfile"
|
|
98
|
+
},
|
|
99
|
+
"cmd": [
|
|
100
|
+
"/app/find-pairs.R"
|
|
101
|
+
]
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
},
|
|
106
|
+
"devDependencies": {
|
|
107
|
+
"@platforma-open/milaboratories.runenv-r-differential-expression": "^1.0.7",
|
|
108
|
+
"@platforma-sdk/package-builder": "^3.10.7"
|
|
109
|
+
},
|
|
110
|
+
"scripts": {
|
|
111
|
+
"build": "pl-pkg build",
|
|
112
|
+
"do-pack": "rm -f *.tgz && pl-pkg build && pnpm pack && mv platforma-open*.tgz package.tgz",
|
|
113
|
+
"changeset": "changeset",
|
|
114
|
+
"version-packages": "changeset version"
|
|
115
|
+
}
|
|
116
|
+
}
|
|
Binary file
|
|
Binary file
|
package/pkg-platforma-open-milaboratories.run-tcrdisco-enrichment.software-tcr-ab-pairs-1.1.0.tgz
ADDED
|
Binary file
|
package/pkg-platforma-open-milaboratories.run-tcrdisco-enrichment.software-tcr-disco-1.1.0.tgz
ADDED
|
Binary file
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
#!/usr/bin/env Rscript
|
|
2
|
+
|
|
3
|
+
# Load required libraries
|
|
4
|
+
suppressMessages(library("optparse"))
|
|
5
|
+
#----------------------------------------
|
|
6
|
+
|
|
7
|
+
# Required functions
|
|
8
|
+
# 1. generate fraction matrices
|
|
9
|
+
get_fraction_matrix = function(main_table) {
|
|
10
|
+
# Generate frequency matrices for both alpha and beta
|
|
11
|
+
# Aggregate fractions by useSampleId and clonotypeKey
|
|
12
|
+
# Use clonotypeKey first to match group_by order
|
|
13
|
+
aggregated <- aggregate(fraction ~ clonotypeKey + useSampleId,
|
|
14
|
+
data = main_table, FUN = sum)
|
|
15
|
+
|
|
16
|
+
# Get unique values in the order they first appear in aggregated (matches group_by + summarise order)
|
|
17
|
+
unique_clonotypes <- unique(aggregated$clonotypeKey)
|
|
18
|
+
unique_samples <- unique(aggregated$useSampleId)
|
|
19
|
+
|
|
20
|
+
# Create matrix with proper dimensions and fill with 0
|
|
21
|
+
fraction_matrix <- matrix(0,
|
|
22
|
+
nrow = length(unique_clonotypes),
|
|
23
|
+
ncol = length(unique_samples),
|
|
24
|
+
dimnames = list(unique_clonotypes, unique_samples))
|
|
25
|
+
|
|
26
|
+
# Fill matrix with aggregated values using match for efficiency
|
|
27
|
+
row_indices <- match(aggregated$clonotypeKey, unique_clonotypes)
|
|
28
|
+
col_indices <- match(aggregated$useSampleId, unique_samples)
|
|
29
|
+
fraction_matrix[cbind(row_indices, col_indices)] <- aggregated$fraction
|
|
30
|
+
|
|
31
|
+
# Ensure it's a numeric matrix
|
|
32
|
+
fraction_matrix <- as.matrix(fraction_matrix)
|
|
33
|
+
fraction_matrix[is.na(fraction_matrix)] <- 0
|
|
34
|
+
|
|
35
|
+
return (fraction_matrix)
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# 2. find pairs
|
|
39
|
+
find_pairs = function(deg_alpha_table, deg_beta_table, metadata_table,
|
|
40
|
+
contrast_col, alpha_matrix, beta_matrix, num) {
|
|
41
|
+
# samples related to the selected numerator
|
|
42
|
+
alpha_clonotypes <- unique(deg_alpha_table[deg_alpha_table$Numerator == num, "clonotypeKey"])
|
|
43
|
+
beta_clonotypes <- unique(deg_beta_table[deg_beta_table$Numerator == num, "clonotypeKey"])
|
|
44
|
+
numerator_samples <- metadata_table[metadata_table[,contrast_col] == num, "useSampleId"]
|
|
45
|
+
alpha_matrix <- alpha_matrix[alpha_clonotypes,
|
|
46
|
+
colnames(alpha_matrix)[colnames(alpha_matrix) %in% numerator_samples], drop = FALSE]
|
|
47
|
+
beta_matrix <- beta_matrix[beta_clonotypes,
|
|
48
|
+
colnames(beta_matrix)[colnames(beta_matrix) %in% numerator_samples], drop = FALSE]
|
|
49
|
+
|
|
50
|
+
# merge TRA and TRB clonotypes into one dataframe
|
|
51
|
+
fraction_table_ab <- t(rbind(alpha_matrix, beta_matrix))
|
|
52
|
+
|
|
53
|
+
# Generate all possible alpha-beta pairs using base R
|
|
54
|
+
pairs_grid <- expand.grid(
|
|
55
|
+
tra = rownames(alpha_matrix),
|
|
56
|
+
trb = rownames(beta_matrix),
|
|
57
|
+
stringsAsFactors = FALSE
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Run correlation tests for each pair
|
|
61
|
+
test_results <- mapply(function(tra, trb) {
|
|
62
|
+
df_small <- fraction_table_ab[, c(tra, trb), drop = FALSE]
|
|
63
|
+
# Check whether both chains are present or absent within replicates
|
|
64
|
+
if (all(rowSums(df_small == 0) != 1)) {
|
|
65
|
+
cor.test(df_small[, 1], df_small[, 2])
|
|
66
|
+
} else {
|
|
67
|
+
NULL
|
|
68
|
+
}
|
|
69
|
+
}, pairs_grid$tra, pairs_grid$trb, SIMPLIFY = FALSE)
|
|
70
|
+
|
|
71
|
+
# Filter out NULL results and extract estimates and p-values
|
|
72
|
+
valid_indices <- !sapply(test_results, is.null)
|
|
73
|
+
predicted_pairs <- pairs_grid[valid_indices, , drop = FALSE]
|
|
74
|
+
predicted_pairs$estimate <- sapply(test_results[valid_indices], function(x) x$estimate)
|
|
75
|
+
predicted_pairs$p.value <- sapply(test_results[valid_indices], function(x) x$p.value)
|
|
76
|
+
|
|
77
|
+
# Perform FDR adjustment and filter by R & FDR threshold
|
|
78
|
+
predicted_pairs$p.adj <- p.adjust(predicted_pairs$p.value, method = "fdr")
|
|
79
|
+
# predicted_pairs <- predicted_pairs[
|
|
80
|
+
# predicted_pairs$p.adj <= fdr_cut & predicted_pairs$estimate >= estimate_cut,
|
|
81
|
+
# , drop = FALSE
|
|
82
|
+
# ]
|
|
83
|
+
|
|
84
|
+
# Add contrast column
|
|
85
|
+
predicted_pairs["Numerator"] = num
|
|
86
|
+
|
|
87
|
+
return (predicted_pairs)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
#----------------------------------------
|
|
91
|
+
|
|
92
|
+
# Main code
|
|
93
|
+
|
|
94
|
+
# Parse command line arguments
|
|
95
|
+
option_list <- list(
|
|
96
|
+
make_option(c("--main_alpha"),
|
|
97
|
+
type = "character", default = "mainAlpha.tsv",
|
|
98
|
+
help = "Path to main TCR alpha clonotypes TSV file", metavar = "character"
|
|
99
|
+
),
|
|
100
|
+
make_option(c("--main_beta"),
|
|
101
|
+
type = "character", default = "mainBeta.tsv",
|
|
102
|
+
help = "Path to main TCR beta clonotypes TSV file", metavar = "character"
|
|
103
|
+
),
|
|
104
|
+
make_option(c("--da_alpha"),
|
|
105
|
+
type = "character", default = "daAlpha.csv",
|
|
106
|
+
help = "Path to DA alpha CSV file", metavar = "character"
|
|
107
|
+
),
|
|
108
|
+
make_option(c("--da_beta"),
|
|
109
|
+
type = "character", default = "daBeta.csv",
|
|
110
|
+
help = "Path to DA beta CSV file", metavar = "character"
|
|
111
|
+
),
|
|
112
|
+
make_option(c("--metadata"),
|
|
113
|
+
type = "character", default = "metadata.tsv",
|
|
114
|
+
help = "Path to metadata TSV file", metavar = "character"
|
|
115
|
+
),
|
|
116
|
+
make_option(c("-t", "--contrast_col"),
|
|
117
|
+
type = "character", default = NULL,
|
|
118
|
+
help = "Column name in metadata for the contrast",
|
|
119
|
+
metavar = "character"
|
|
120
|
+
),
|
|
121
|
+
make_option(c("--sample_id_col"),
|
|
122
|
+
type = "character", default = NULL,
|
|
123
|
+
help = "Column name in metadata for the sample ID",
|
|
124
|
+
metavar = "character"
|
|
125
|
+
),
|
|
126
|
+
make_option(c("-f", "--fc_threshold"),
|
|
127
|
+
type = "double", default = 0.5,
|
|
128
|
+
help = "Log2(FC) threshold for significance"
|
|
129
|
+
),
|
|
130
|
+
make_option(c("-p", "--p_threshold"),
|
|
131
|
+
type = "double", default = 0.05,
|
|
132
|
+
help = "Adjusted p-value threshold for significance"
|
|
133
|
+
),
|
|
134
|
+
make_option(c("-o", "--output"),
|
|
135
|
+
type = "character",
|
|
136
|
+
default = ".",
|
|
137
|
+
help = "Output folder for TSV results", metavar = "character"
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
opt_parser <- OptionParser(option_list = option_list)
|
|
142
|
+
opt <- parse_args(opt_parser)
|
|
143
|
+
|
|
144
|
+
# Get input data
|
|
145
|
+
main_alpha <- opt$main_alpha
|
|
146
|
+
main_beta <- opt$main_beta
|
|
147
|
+
da_alpha <- opt$da_alpha
|
|
148
|
+
da_beta <- opt$da_beta
|
|
149
|
+
metadata <- opt$metadata
|
|
150
|
+
contrast_col <- opt$contrast_col
|
|
151
|
+
sample_id_col <- opt$sample_id_col
|
|
152
|
+
fc_cut <- opt$fc_threshold
|
|
153
|
+
fdr_cut <- opt$p_threshold
|
|
154
|
+
output_folder <- opt$output
|
|
155
|
+
|
|
156
|
+
# test
|
|
157
|
+
# metadata <- "./metadata.tsv"
|
|
158
|
+
# main_alpha <- "./mainAlpha.tsv"
|
|
159
|
+
# main_beta <- "./mainBeta.tsv"
|
|
160
|
+
# da_alpha <- "./daAlpha.csv"
|
|
161
|
+
# da_beta <- "./daBeta.csv"
|
|
162
|
+
# contrast_col <- "ag"
|
|
163
|
+
# output_folder <- "./resultsPairing"
|
|
164
|
+
# sample_id_col <- "Barcode ID"
|
|
165
|
+
|
|
166
|
+
# Get from platforma
|
|
167
|
+
# @TODO: Filters are not yet script-specific, implement them separately for DA and pairing
|
|
168
|
+
# fdr_cut <- 0.05
|
|
169
|
+
# estimate_cut <- 0.95
|
|
170
|
+
|
|
171
|
+
## Control prints
|
|
172
|
+
print(paste("metadata file: ", metadata))
|
|
173
|
+
print(paste("main alpha file: ", main_alpha))
|
|
174
|
+
print(paste("main beta file: ", main_beta))
|
|
175
|
+
print(paste("da alpha file: ", da_alpha))
|
|
176
|
+
print(paste("da beta file: ", da_beta))
|
|
177
|
+
print(paste("contrast column: ", contrast_col))
|
|
178
|
+
print(paste("sample id column: ", sample_id_col))
|
|
179
|
+
print(paste("fc threshold: ", fc_cut))
|
|
180
|
+
print(paste("fdr threshold: ", fdr_cut))
|
|
181
|
+
print(paste("output folder: ", output_folder))
|
|
182
|
+
|
|
183
|
+
## 1.1. TCR Discovery
|
|
184
|
+
# Get alpha/beta DEG tables
|
|
185
|
+
deg_alpha_table <- read.csv(da_alpha, header = TRUE, sep = ",", stringsAsFactors = FALSE)
|
|
186
|
+
deg_beta_table <- read.csv(da_beta, header = TRUE, sep = ",", stringsAsFactors = FALSE)
|
|
187
|
+
|
|
188
|
+
if (nrow(deg_alpha_table) == 0 || nrow(deg_beta_table) == 0) {
|
|
189
|
+
print("Warning: The DA alpha or beta tables are empty. No pairs will be found.")
|
|
190
|
+
|
|
191
|
+
# Create an emtpy output table with sall the required columns
|
|
192
|
+
required_cols <- c("Numerator", "tra", "trb", "estimate", "p.value", "p.adj", "tra_CDR3aa", "tra_VGene", "trb_CDR3aa", "trb_VGene", "is_max_correlation")
|
|
193
|
+
empty_table <- data.frame(matrix(ncol = length(required_cols), nrow = 0))
|
|
194
|
+
colnames(empty_table) <- required_cols
|
|
195
|
+
predicted_pairs_all <- empty_table
|
|
196
|
+
|
|
197
|
+
} else {
|
|
198
|
+
# Load metadata and main alpha/beta tables
|
|
199
|
+
metadata_table <- read.table(metadata, header = TRUE, sep = "\t", stringsAsFactors = FALSE, check.names = FALSE)
|
|
200
|
+
main_alpha_table <- read.table(main_alpha, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
|
|
201
|
+
main_beta_table <- read.table(main_beta, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
|
|
202
|
+
|
|
203
|
+
# check if sample_id_col is in metadata_table and if it has less values than internalSampleId
|
|
204
|
+
main_alpha_table$useSampleId <- main_alpha_table$internalSampleId
|
|
205
|
+
main_beta_table$useSampleId <- main_beta_table$internalSampleId
|
|
206
|
+
if (sample_id_col %in% colnames(metadata_table)) {
|
|
207
|
+
if (length(unique(metadata_table[, sample_id_col])) < length(unique(metadata_table$internalSampleId))) {
|
|
208
|
+
# Add original sample ID to the alpha and beta tables
|
|
209
|
+
main_alpha_table$useSampleId <- metadata_table[match(main_alpha_table$internalSampleId, metadata_table$internalSampleId), sample_id_col]
|
|
210
|
+
main_beta_table$useSampleId <- metadata_table[match(main_beta_table$internalSampleId, metadata_table$internalSampleId), sample_id_col]
|
|
211
|
+
|
|
212
|
+
metadata_table$useSampleId <- metadata_table[, sample_id_col]
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
# Make sure we have the same set of samples to compare
|
|
217
|
+
if (!identical(sort(unique(main_alpha_table$useSampleId)), sort(unique(main_beta_table$useSampleId)))) {
|
|
218
|
+
stop("Error: The sets of samples in the alpha and beta tables are not the same")
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
# Filter main tables to only include DEG clonotypes to reduce memory and processing time
|
|
222
|
+
# We will loose samples, so it's important to have it after the check above
|
|
223
|
+
deg_alpha_clonotypes <- unique(deg_alpha_table$clonotypeKey)
|
|
224
|
+
deg_beta_clonotypes <- unique(deg_beta_table$clonotypeKey)
|
|
225
|
+
main_alpha_table <- main_alpha_table[main_alpha_table$clonotypeKey %in% deg_alpha_clonotypes, ]
|
|
226
|
+
main_beta_table <- main_beta_table[main_beta_table$clonotypeKey %in% deg_beta_clonotypes, ]
|
|
227
|
+
|
|
228
|
+
# Get alpha/beta fraction matrices
|
|
229
|
+
alpha_matrix <- get_fraction_matrix(main_alpha_table)
|
|
230
|
+
beta_matrix <- get_fraction_matrix(main_beta_table)
|
|
231
|
+
|
|
232
|
+
# Get all possible values from Numerator column in both tables
|
|
233
|
+
numerators <- unique(c(deg_alpha_table$Numerator, deg_beta_table$Numerator))
|
|
234
|
+
|
|
235
|
+
# Find pairs per numerator and comine results into one table
|
|
236
|
+
predicted_pairs_all <- data.frame()
|
|
237
|
+
for (num in numerators) {
|
|
238
|
+
predicted_pairs <- find_pairs(deg_alpha_table, deg_beta_table, metadata_table,
|
|
239
|
+
contrast_col, alpha_matrix, beta_matrix, num)
|
|
240
|
+
predicted_pairs_all <- rbind(predicted_pairs_all, predicted_pairs)
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
# Filter out negative correlations
|
|
244
|
+
predicted_pairs_all <- predicted_pairs_all[predicted_pairs_all$estimate >= 0, ]
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
# Add TRA and TRB CDR3 aa and VGene data
|
|
248
|
+
## Match tra column with alpha table
|
|
249
|
+
alpha_match_idx <- match(predicted_pairs_all$tra, main_alpha_table$clonotypeKey)
|
|
250
|
+
predicted_pairs_all$tra_CDR3aa <- main_alpha_table$CDR3aa[alpha_match_idx]
|
|
251
|
+
predicted_pairs_all$tra_VGene <- main_alpha_table$VGene[alpha_match_idx]
|
|
252
|
+
## Match trb column with beta table
|
|
253
|
+
beta_match_idx <- match(predicted_pairs_all$trb, main_beta_table$clonotypeKey)
|
|
254
|
+
predicted_pairs_all$trb_CDR3aa <- main_beta_table$CDR3aa[beta_match_idx]
|
|
255
|
+
predicted_pairs_all$trb_VGene <- main_beta_table$VGene[beta_match_idx]
|
|
256
|
+
|
|
257
|
+
# Add column indicating if this pair's correlation is the maximum for either tra or trb
|
|
258
|
+
predicted_pairs_all$is_max_correlation <-
|
|
259
|
+
(predicted_pairs_all$estimate == ave(predicted_pairs_all$estimate, predicted_pairs_all$tra, FUN = max)) |
|
|
260
|
+
(predicted_pairs_all$estimate == ave(predicted_pairs_all$estimate, predicted_pairs_all$trb, FUN = max))
|
|
261
|
+
predicted_pairs_all$is_max_correlation[predicted_pairs_all$is_max_correlation == TRUE] <- "max"
|
|
262
|
+
predicted_pairs_all$is_max_correlation[predicted_pairs_all$is_max_correlation == FALSE] <- "not_max"
|
|
263
|
+
|
|
264
|
+
# Order by tra
|
|
265
|
+
predicted_pairs_all <- predicted_pairs_all[order(predicted_pairs_all$tra), ]
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
#save ft_ and ct_filtered in the output_folder
|
|
269
|
+
write.table(predicted_pairs_all, paste0(output_folder, "/ab_pairs.tsv"),
|
|
270
|
+
sep = "\t", quote = F, row.names = F)
|
|
271
|
+
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
#!/usr/bin/env Rscript
|
|
2
|
+
|
|
3
|
+
# Load required libraries
|
|
4
|
+
suppressMessages(library("optparse"))
|
|
5
|
+
#----------------------------------------
|
|
6
|
+
|
|
7
|
+
# Main code
|
|
8
|
+
|
|
9
|
+
# Parse command line arguments
|
|
10
|
+
option_list <- list(
|
|
11
|
+
make_option(c("--main_alpha"),
|
|
12
|
+
type = "character", default = "mainAlpha.tsv",
|
|
13
|
+
help = "Path to main TCR alpha clonotypes TSV file", metavar = "character"
|
|
14
|
+
),
|
|
15
|
+
make_option(c("--main_beta"),
|
|
16
|
+
type = "character", default = "mainBeta.tsv",
|
|
17
|
+
help = "Path to main TCR beta clonotypes TSV file", metavar = "character"
|
|
18
|
+
),
|
|
19
|
+
make_option(c("--da_alpha"),
|
|
20
|
+
type = "character", default = "daAlpha.csv",
|
|
21
|
+
help = "Path to DA alpha CSV file", metavar = "character"
|
|
22
|
+
),
|
|
23
|
+
make_option(c("--da_beta"),
|
|
24
|
+
type = "character", default = "daBeta.csv",
|
|
25
|
+
help = "Path to DA beta CSV file", metavar = "character"
|
|
26
|
+
),
|
|
27
|
+
make_option(c("-o", "--output"),
|
|
28
|
+
type = "character",
|
|
29
|
+
default = ".",
|
|
30
|
+
help = "Output folder for TSV results", metavar = "character"
|
|
31
|
+
)
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
opt_parser <- OptionParser(option_list = option_list)
|
|
35
|
+
opt <- parse_args(opt_parser)
|
|
36
|
+
|
|
37
|
+
# Get input data
|
|
38
|
+
main_alpha <- opt$main_alpha
|
|
39
|
+
main_beta <- opt$main_beta
|
|
40
|
+
da_alpha <- opt$da_alpha
|
|
41
|
+
da_beta <- opt$da_beta
|
|
42
|
+
output_folder <- opt$output
|
|
43
|
+
|
|
44
|
+
# test
|
|
45
|
+
# main_alpha <- "./mainAlpha.tsv"
|
|
46
|
+
# main_beta <- "./mainBeta.tsv"
|
|
47
|
+
# da_alpha <- "./DA_alpha.csv"
|
|
48
|
+
# da_beta <- "./DA_beta.csv"
|
|
49
|
+
# output_folder <- "./resultsPairing"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
## 1.1. TCR Discovery
|
|
53
|
+
# Load metadata
|
|
54
|
+
main_alpha_table <- read.table(main_alpha, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
|
|
55
|
+
main_beta_table <- read.table(main_beta, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
|
|
56
|
+
|
|
57
|
+
# Get alpha/beta DEG tables
|
|
58
|
+
deg_alpha_table <- read.csv(da_alpha, header = TRUE, sep = ",", stringsAsFactors = FALSE)
|
|
59
|
+
deg_beta_table <- read.csv(da_beta, header = TRUE, sep = ",", stringsAsFactors = FALSE)
|
|
60
|
+
|
|
61
|
+
# Keep only DA clonotypes from main tables
|
|
62
|
+
main_alpha_table <- main_alpha_table[main_alpha_table$clonotypeKey %in% unique(deg_alpha_table$clonotypeKey), ]
|
|
63
|
+
main_beta_table <- main_beta_table[main_beta_table$clonotypeKey %in% unique(deg_beta_table$clonotypeKey), ]
|
|
64
|
+
|
|
65
|
+
# Store the tables
|
|
66
|
+
if (!dir.exists(output_folder)) {
|
|
67
|
+
dir.create(output_folder, recursive = TRUE)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if ("subset" %in% colnames(main_alpha_table)) {
|
|
71
|
+
keep_cols1 <- c("internalSampleId", "clonotypeKey", "fraction", "subset")
|
|
72
|
+
keep_cols2 <- c("clonotypeKey", "subset")
|
|
73
|
+
} else {
|
|
74
|
+
keep_cols1 <- c("internalSampleId", "clonotypeKey", "fraction")
|
|
75
|
+
keep_cols2 <- c("clonotypeKey")
|
|
76
|
+
}
|
|
77
|
+
write.table(main_alpha_table[, keep_cols1],
|
|
78
|
+
paste0(output_folder, "/main_alpha_frequencies.tsv"),
|
|
79
|
+
sep = "\t", quote = F, row.names = F)
|
|
80
|
+
write.table(main_beta_table[, keep_cols1],
|
|
81
|
+
paste0(output_folder, "/main_beta_frequencies.tsv"),
|
|
82
|
+
sep = "\t", quote = F, row.names = F)
|
|
83
|
+
|
|
84
|
+
if ("subset" %in% colnames(main_alpha_table)) {
|
|
85
|
+
# store clonotype to subset mapping removing repeated lines
|
|
86
|
+
clonotype_to_subset_alpha <- unique(main_alpha_table[, c("clonotypeKey", "subset")])
|
|
87
|
+
clonotype_to_subset_beta <- unique(main_beta_table[, c("clonotypeKey", "subset")])
|
|
88
|
+
|
|
89
|
+
write.table(clonotype_to_subset_alpha, paste0(output_folder, "/clonotype_to_subset_alpha.tsv"),
|
|
90
|
+
sep = "\t", quote = F, row.names = F)
|
|
91
|
+
write.table(clonotype_to_subset_beta, paste0(output_folder, "/clonotype_to_subset_beta.tsv"),
|
|
92
|
+
sep = "\t", quote = F, row.names = F)
|
|
93
|
+
} else {
|
|
94
|
+
# Create empty tables
|
|
95
|
+
clonotype_to_subset_alpha <- data.frame(clonotypeKey = character(), subset = character())
|
|
96
|
+
clonotype_to_subset_beta <- data.frame(clonotypeKey = character(), subset = character())
|
|
97
|
+
|
|
98
|
+
write.table(clonotype_to_subset_alpha, paste0(output_folder, "/clonotype_to_subset_alpha.tsv"),
|
|
99
|
+
sep = "\t", quote = F, row.names = F)
|
|
100
|
+
write.table(clonotype_to_subset_beta, paste0(output_folder, "/clonotype_to_subset_beta.tsv"),
|
|
101
|
+
sep = "\t", quote = F, row.names = F)
|
|
102
|
+
}
|