@platforma-open/milaboratories.run-tcrdisco-enrichment.software 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/.turbo/turbo-build.log +2183 -0
  2. package/CHANGELOG.md +7 -0
  3. package/Dockerfile +56 -0
  4. package/dist/artifacts/get-enriched-frequencies/archive.json +1 -0
  5. package/dist/artifacts/get-enriched-frequencies/docker_x64.json +1 -0
  6. package/dist/artifacts/subset-assignment/archive.json +1 -0
  7. package/dist/artifacts/subset-assignment/docker_x64.json +1 -0
  8. package/dist/artifacts/tcr-ab-pairs/archive.json +1 -0
  9. package/dist/artifacts/tcr-ab-pairs/docker_x64.json +1 -0
  10. package/dist/artifacts/tcr-disco/archive.json +1 -0
  11. package/dist/artifacts/tcr-disco/docker_x64.json +1 -0
  12. package/dist/tengo/software/get-enriched-frequencies.sw.json +1 -0
  13. package/dist/tengo/software/subset-assignment.sw.json +1 -0
  14. package/dist/tengo/software/tcr-ab-pairs.sw.json +1 -0
  15. package/dist/tengo/software/tcr-disco.sw.json +1 -0
  16. package/package.json +116 -0
  17. package/pkg-platforma-open-milaboratories.run-tcrdisco-enrichment.software-get-enriched-frequencies-1.1.0.tgz +0 -0
  18. package/pkg-platforma-open-milaboratories.run-tcrdisco-enrichment.software-subset-assignment-1.1.0.tgz +0 -0
  19. package/pkg-platforma-open-milaboratories.run-tcrdisco-enrichment.software-tcr-ab-pairs-1.1.0.tgz +0 -0
  20. package/pkg-platforma-open-milaboratories.run-tcrdisco-enrichment.software-tcr-disco-1.1.0.tgz +0 -0
  21. package/src/tcr-disco/find-pairs.R +271 -0
  22. package/src/tcr-disco/get-enriched-frequencies.R +102 -0
  23. package/src/tcr-disco/main.R +291 -0
  24. package/src/tcr-disco/renv.lock +6375 -0
  25. package/src/tcr-disco/subset-assignment.R +194 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,7 @@
1
+ # @platforma-open/milaboratories.run-tcrdisco-enrichment.software
2
+
3
+ ## 1.1.0
4
+
5
+ ### Minor Changes
6
+
7
+ - ede2472: First block version
package/Dockerfile ADDED
@@ -0,0 +1,56 @@
1
+ FROM r-base:4.4.2
2
+
3
+ ENV R_VERSION=4.4.2
4
+ ENV BIOCONDUCTOR_VERSION=3.20
5
+ ENV RENV_VERSION=1.0.11
6
+ ENV RENV_CONFIG_PPM_ENABLED=false
7
+ ENV RENV_CONFIG_REPOS_OVERRIDE=https://cloud.r-project.org
8
+
9
+ # Install system dependencies required for R packages
10
+ RUN apt-get update && apt-get install -y \
11
+ libcurl4-openssl-dev \
12
+ libssl-dev \
13
+ libxml2-dev \
14
+ libcairo2-dev \
15
+ libgit2-dev \
16
+ default-libmysqlclient-dev \
17
+ libpq-dev \
18
+ libsasl2-dev \
19
+ libsqlite3-dev \
20
+ libssh2-1-dev \
21
+ unixodbc-dev \
22
+ libharfbuzz-dev \
23
+ libfribidi-dev \
24
+ libfreetype6-dev \
25
+ libpng-dev \
26
+ libtiff5-dev \
27
+ libjpeg-dev \
28
+ libgsl-dev \
29
+ libbz2-dev \
30
+ liblzma-dev \
31
+ libpcre2-dev \
32
+ && apt-get clean \
33
+ && rm -rf /var/lib/apt/lists/*
34
+
35
+ # Set working directory
36
+ WORKDIR /app
37
+
38
+ RUN R -e "install.packages('renv', repos = 'https://cloud.r-project.org')"
39
+
40
+ # .Rprofile breaks renv restore
41
+ # Copy only files that are needed
42
+ COPY *.R .
43
+ COPY ./renv.lock ./renv.lock
44
+
45
+ # Have to use absolute paths because during execution the working directory will be changed
46
+ # and realpath command does not work
47
+ ENV RENV_PATHS_RENV=/app/renv
48
+ ENV RENV_PATHS_LOCKFILE=/app/renv.lock
49
+
50
+ RUN R --no-echo -e "renv::restore(clean = TRUE)"
51
+
52
+ RUN echo "#!/bin/bash\nR --no-echo --no-restore -e \"renv::restore()\"\n\"\$@\"" > /app/run.sh
53
+ RUN chmod +x /app/run.sh
54
+
55
+ # Default command runs Rscript
56
+ ENTRYPOINT ["/app/run.sh"]
@@ -0,0 +1 @@
1
+ {"type":"R","platform":"linux-x64","registryURL":"https://bin.pl-open.science/","registryName":"platforma-open","remoteArtifactLocation":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/get-enriched-frequencies/1.1.0.tgz","uploadPath":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/get-enriched-frequencies/1.1.0.tgz"}
@@ -0,0 +1 @@
1
+ {"type":"docker","platform":"linux-x64","remoteArtifactLocation":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.get-enriched-frequencies.c8e04e155280"}
@@ -0,0 +1 @@
1
+ {"type":"R","platform":"linux-x64","registryURL":"https://bin.pl-open.science/","registryName":"platforma-open","remoteArtifactLocation":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/subset-assignment/1.1.0.tgz","uploadPath":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/subset-assignment/1.1.0.tgz"}
@@ -0,0 +1 @@
1
+ {"type":"docker","platform":"linux-x64","remoteArtifactLocation":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.subset-assignment.eb67f96e9c9a"}
@@ -0,0 +1 @@
1
+ {"type":"R","platform":"linux-x64","registryURL":"https://bin.pl-open.science/","registryName":"platforma-open","remoteArtifactLocation":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-ab-pairs/1.1.0.tgz","uploadPath":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-ab-pairs/1.1.0.tgz"}
@@ -0,0 +1 @@
1
+ {"type":"docker","platform":"linux-x64","remoteArtifactLocation":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.tcr-ab-pairs.2e1cbb240fae"}
@@ -0,0 +1 @@
1
+ {"type":"R","platform":"linux-x64","registryURL":"https://bin.pl-open.science/","registryName":"platforma-open","remoteArtifactLocation":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-disco/1.1.0.tgz","uploadPath":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-disco/1.1.0.tgz"}
@@ -0,0 +1 @@
1
+ {"type":"docker","platform":"linux-x64","remoteArtifactLocation":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.tcr-disco.af738a23e37d"}
@@ -0,0 +1 @@
1
+ {"name":"@platforma-open/milaboratories.run-tcrdisco-enrichment.software:get-enriched-frequencies","docker":{"tag":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.get-enriched-frequencies.c8e04e155280","entrypoint":[],"cmd":["/app/get-enriched-frequencies.R"],"pkg":"/app"},"binary":{"type":"R","registry":"platforma-open","package":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/get-enriched-frequencies/1.1.0.tgz","cmd":["Rscript","{pkg}/get-enriched-frequencies.R"],"envVars":[],"runEnv":{"name":"@platforma-open/milaboratories.runenv-r-differential-expression:main","type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.runenv-r-differential-expression/main/1.0.7-{os}-{arch}.tgz","binDir":"bin","r-version":""},"toolset":"renv","dependencies":{}}}
@@ -0,0 +1 @@
1
+ {"name":"@platforma-open/milaboratories.run-tcrdisco-enrichment.software:subset-assignment","docker":{"tag":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.subset-assignment.eb67f96e9c9a","entrypoint":[],"cmd":["/app/main.R"],"pkg":"/app"},"binary":{"type":"R","registry":"platforma-open","package":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/subset-assignment/1.1.0.tgz","cmd":["Rscript","{pkg}/subset-assignment.R"],"envVars":[],"runEnv":{"name":"@platforma-open/milaboratories.runenv-r-differential-expression:main","type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.runenv-r-differential-expression/main/1.0.7-{os}-{arch}.tgz","binDir":"bin","r-version":""},"toolset":"renv","dependencies":{}}}
@@ -0,0 +1 @@
1
+ {"name":"@platforma-open/milaboratories.run-tcrdisco-enrichment.software:tcr-ab-pairs","docker":{"tag":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.tcr-ab-pairs.2e1cbb240fae","entrypoint":[],"cmd":["/app/find-pairs.R"],"pkg":"/app"},"binary":{"type":"R","registry":"platforma-open","package":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-ab-pairs/1.1.0.tgz","cmd":["Rscript","{pkg}/find-pairs.R"],"envVars":[],"runEnv":{"name":"@platforma-open/milaboratories.runenv-r-differential-expression:main","type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.runenv-r-differential-expression/main/1.0.7-{os}-{arch}.tgz","binDir":"bin","r-version":""},"toolset":"renv","dependencies":{}}}
@@ -0,0 +1 @@
1
+ {"name":"@platforma-open/milaboratories.run-tcrdisco-enrichment.software:tcr-disco","docker":{"tag":"containers.pl-open.science/milaboratories/pl-containers:platforma-open.milaboratories.run-tcrdisco-enrichment.software.tcr-disco.af738a23e37d","entrypoint":[],"cmd":["/app/main.R"],"pkg":"/app"},"binary":{"type":"R","registry":"platforma-open","package":"software/platforma-open/milaboratories.run-tcrdisco-enrichment.software/tcr-disco/1.1.0.tgz","cmd":["Rscript","{pkg}/main.R"],"envVars":[],"runEnv":{"name":"@platforma-open/milaboratories.runenv-r-differential-expression:main","type":"R","registry":"platforma-open","package":"platforma-open/milaboratories.runenv-r-differential-expression/main/1.0.7-{os}-{arch}.tgz","binDir":"bin","r-version":""},"toolset":"renv","dependencies":{}}}
package/package.json ADDED
@@ -0,0 +1,116 @@
1
+ {
2
+ "name": "@platforma-open/milaboratories.run-tcrdisco-enrichment.software",
3
+ "version": "1.1.0",
4
+ "type": "module",
5
+ "description": "Block Software: Run TCR Disco with R",
6
+ "block-software": {
7
+ "entrypoints": {
8
+ "subset-assignment": {
9
+ "binary": {
10
+ "artifact": {
11
+ "type": "R",
12
+ "registry": "platforma-open",
13
+ "environment": "@platforma-open/milaboratories.runenv-r-differential-expression:main",
14
+ "root": "./src/tcr-disco"
15
+ },
16
+ "cmd": [
17
+ "Rscript",
18
+ "{pkg}/subset-assignment.R"
19
+ ]
20
+ },
21
+ "docker": {
22
+ "artifact": {
23
+ "type": "docker",
24
+ "context": "./src/tcr-disco",
25
+ "dockerfile": "Dockerfile"
26
+ },
27
+ "cmd": [
28
+ "/app/main.R"
29
+ ]
30
+ }
31
+ },
32
+ "tcr-disco": {
33
+ "binary": {
34
+ "artifact": {
35
+ "type": "R",
36
+ "registry": "platforma-open",
37
+ "environment": "@platforma-open/milaboratories.runenv-r-differential-expression:main",
38
+ "root": "./src/tcr-disco"
39
+ },
40
+ "cmd": [
41
+ "Rscript",
42
+ "{pkg}/main.R"
43
+ ]
44
+ },
45
+ "docker": {
46
+ "artifact": {
47
+ "type": "docker",
48
+ "context": "./src/tcr-disco",
49
+ "dockerfile": "Dockerfile"
50
+ },
51
+ "cmd": [
52
+ "/app/main.R"
53
+ ]
54
+ }
55
+ },
56
+ "get-enriched-frequencies": {
57
+ "binary": {
58
+ "artifact": {
59
+ "type": "R",
60
+ "registry": "platforma-open",
61
+ "environment": "@platforma-open/milaboratories.runenv-r-differential-expression:main",
62
+ "root": "./src/tcr-disco"
63
+ },
64
+ "cmd": [
65
+ "Rscript",
66
+ "{pkg}/get-enriched-frequencies.R"
67
+ ]
68
+ },
69
+ "docker": {
70
+ "artifact": {
71
+ "type": "docker",
72
+ "context": "./src/tcr-disco",
73
+ "dockerfile": "Dockerfile"
74
+ },
75
+ "cmd": [
76
+ "/app/get-enriched-frequencies.R"
77
+ ]
78
+ }
79
+ },
80
+ "tcr-ab-pairs": {
81
+ "binary": {
82
+ "artifact": {
83
+ "type": "R",
84
+ "registry": "platforma-open",
85
+ "environment": "@platforma-open/milaboratories.runenv-r-differential-expression:main",
86
+ "root": "./src/tcr-disco"
87
+ },
88
+ "cmd": [
89
+ "Rscript",
90
+ "{pkg}/find-pairs.R"
91
+ ]
92
+ },
93
+ "docker": {
94
+ "artifact": {
95
+ "type": "docker",
96
+ "context": "./src/tcr-disco",
97
+ "dockerfile": "Dockerfile"
98
+ },
99
+ "cmd": [
100
+ "/app/find-pairs.R"
101
+ ]
102
+ }
103
+ }
104
+ }
105
+ },
106
+ "devDependencies": {
107
+ "@platforma-open/milaboratories.runenv-r-differential-expression": "^1.0.7",
108
+ "@platforma-sdk/package-builder": "^3.10.7"
109
+ },
110
+ "scripts": {
111
+ "build": "pl-pkg build",
112
+ "do-pack": "rm -f *.tgz && pl-pkg build && pnpm pack && mv platforma-open*.tgz package.tgz",
113
+ "changeset": "changeset",
114
+ "version-packages": "changeset version"
115
+ }
116
+ }
@@ -0,0 +1,271 @@
1
+ #!/usr/bin/env Rscript
2
+
3
+ # Load required libraries
4
+ suppressMessages(library("optparse"))
5
+ #----------------------------------------
6
+
7
+ # Required functions
8
+ # 1. generate fraction matrices
9
+ get_fraction_matrix = function(main_table) {
10
+ # Generate frequency matrices for both alpha and beta
11
+ # Aggregate fractions by useSampleId and clonotypeKey
12
+ # Use clonotypeKey first to match group_by order
13
+ aggregated <- aggregate(fraction ~ clonotypeKey + useSampleId,
14
+ data = main_table, FUN = sum)
15
+
16
+ # Get unique values in the order they first appear in aggregated (matches group_by + summarise order)
17
+ unique_clonotypes <- unique(aggregated$clonotypeKey)
18
+ unique_samples <- unique(aggregated$useSampleId)
19
+
20
+ # Create matrix with proper dimensions and fill with 0
21
+ fraction_matrix <- matrix(0,
22
+ nrow = length(unique_clonotypes),
23
+ ncol = length(unique_samples),
24
+ dimnames = list(unique_clonotypes, unique_samples))
25
+
26
+ # Fill matrix with aggregated values using match for efficiency
27
+ row_indices <- match(aggregated$clonotypeKey, unique_clonotypes)
28
+ col_indices <- match(aggregated$useSampleId, unique_samples)
29
+ fraction_matrix[cbind(row_indices, col_indices)] <- aggregated$fraction
30
+
31
+ # Ensure it's a numeric matrix
32
+ fraction_matrix <- as.matrix(fraction_matrix)
33
+ fraction_matrix[is.na(fraction_matrix)] <- 0
34
+
35
+ return (fraction_matrix)
36
+ }
37
+
38
+ # 2. find pairs
39
+ find_pairs = function(deg_alpha_table, deg_beta_table, metadata_table,
40
+ contrast_col, alpha_matrix, beta_matrix, num) {
41
+ # samples related to the selected numerator
42
+ alpha_clonotypes <- unique(deg_alpha_table[deg_alpha_table$Numerator == num, "clonotypeKey"])
43
+ beta_clonotypes <- unique(deg_beta_table[deg_beta_table$Numerator == num, "clonotypeKey"])
44
+ numerator_samples <- metadata_table[metadata_table[,contrast_col] == num, "useSampleId"]
45
+ alpha_matrix <- alpha_matrix[alpha_clonotypes,
46
+ colnames(alpha_matrix)[colnames(alpha_matrix) %in% numerator_samples], drop = FALSE]
47
+ beta_matrix <- beta_matrix[beta_clonotypes,
48
+ colnames(beta_matrix)[colnames(beta_matrix) %in% numerator_samples], drop = FALSE]
49
+
50
+ # merge TRA and TRB clonotypes into one dataframe
51
+ fraction_table_ab <- t(rbind(alpha_matrix, beta_matrix))
52
+
53
+ # Generate all possible alpha-beta pairs using base R
54
+ pairs_grid <- expand.grid(
55
+ tra = rownames(alpha_matrix),
56
+ trb = rownames(beta_matrix),
57
+ stringsAsFactors = FALSE
58
+ )
59
+
60
+ # Run correlation tests for each pair
61
+ test_results <- mapply(function(tra, trb) {
62
+ df_small <- fraction_table_ab[, c(tra, trb), drop = FALSE]
63
+ # Check whether both chains are present or absent within replicates
64
+ if (all(rowSums(df_small == 0) != 1)) {
65
+ cor.test(df_small[, 1], df_small[, 2])
66
+ } else {
67
+ NULL
68
+ }
69
+ }, pairs_grid$tra, pairs_grid$trb, SIMPLIFY = FALSE)
70
+
71
+ # Filter out NULL results and extract estimates and p-values
72
+ valid_indices <- !sapply(test_results, is.null)
73
+ predicted_pairs <- pairs_grid[valid_indices, , drop = FALSE]
74
+ predicted_pairs$estimate <- sapply(test_results[valid_indices], function(x) x$estimate)
75
+ predicted_pairs$p.value <- sapply(test_results[valid_indices], function(x) x$p.value)
76
+
77
+ # Perform FDR adjustment and filter by R & FDR threshold
78
+ predicted_pairs$p.adj <- p.adjust(predicted_pairs$p.value, method = "fdr")
79
+ # predicted_pairs <- predicted_pairs[
80
+ # predicted_pairs$p.adj <= fdr_cut & predicted_pairs$estimate >= estimate_cut,
81
+ # , drop = FALSE
82
+ # ]
83
+
84
+ # Add contrast column
85
+ predicted_pairs["Numerator"] = num
86
+
87
+ return (predicted_pairs)
88
+ }
89
+
90
+ #----------------------------------------
91
+
92
+ # Main code
93
+
94
+ # Parse command line arguments
95
+ option_list <- list(
96
+ make_option(c("--main_alpha"),
97
+ type = "character", default = "mainAlpha.tsv",
98
+ help = "Path to main TCR alpha clonotypes TSV file", metavar = "character"
99
+ ),
100
+ make_option(c("--main_beta"),
101
+ type = "character", default = "mainBeta.tsv",
102
+ help = "Path to main TCR beta clonotypes TSV file", metavar = "character"
103
+ ),
104
+ make_option(c("--da_alpha"),
105
+ type = "character", default = "daAlpha.csv",
106
+ help = "Path to DA alpha CSV file", metavar = "character"
107
+ ),
108
+ make_option(c("--da_beta"),
109
+ type = "character", default = "daBeta.csv",
110
+ help = "Path to DA beta CSV file", metavar = "character"
111
+ ),
112
+ make_option(c("--metadata"),
113
+ type = "character", default = "metadata.tsv",
114
+ help = "Path to metadata TSV file", metavar = "character"
115
+ ),
116
+ make_option(c("-t", "--contrast_col"),
117
+ type = "character", default = NULL,
118
+ help = "Column name in metadata for the contrast",
119
+ metavar = "character"
120
+ ),
121
+ make_option(c("--sample_id_col"),
122
+ type = "character", default = NULL,
123
+ help = "Column name in metadata for the sample ID",
124
+ metavar = "character"
125
+ ),
126
+ make_option(c("-f", "--fc_threshold"),
127
+ type = "double", default = 0.5,
128
+ help = "Log2(FC) threshold for significance"
129
+ ),
130
+ make_option(c("-p", "--p_threshold"),
131
+ type = "double", default = 0.05,
132
+ help = "Adjusted p-value threshold for significance"
133
+ ),
134
+ make_option(c("-o", "--output"),
135
+ type = "character",
136
+ default = ".",
137
+ help = "Output folder for TSV results", metavar = "character"
138
+ )
139
+ )
140
+
141
+ opt_parser <- OptionParser(option_list = option_list)
142
+ opt <- parse_args(opt_parser)
143
+
144
+ # Get input data
145
+ main_alpha <- opt$main_alpha
146
+ main_beta <- opt$main_beta
147
+ da_alpha <- opt$da_alpha
148
+ da_beta <- opt$da_beta
149
+ metadata <- opt$metadata
150
+ contrast_col <- opt$contrast_col
151
+ sample_id_col <- opt$sample_id_col
152
+ fc_cut <- opt$fc_threshold
153
+ fdr_cut <- opt$p_threshold
154
+ output_folder <- opt$output
155
+
156
+ # test
157
+ # metadata <- "./metadata.tsv"
158
+ # main_alpha <- "./mainAlpha.tsv"
159
+ # main_beta <- "./mainBeta.tsv"
160
+ # da_alpha <- "./daAlpha.csv"
161
+ # da_beta <- "./daBeta.csv"
162
+ # contrast_col <- "ag"
163
+ # output_folder <- "./resultsPairing"
164
+ # sample_id_col <- "Barcode ID"
165
+
166
+ # Get from platforma
167
+ # @TODO: Filters are not yet script-specific, implement them separately for DA and pairing
168
+ # fdr_cut <- 0.05
169
+ # estimate_cut <- 0.95
170
+
171
+ ## Control prints
172
+ print(paste("metadata file: ", metadata))
173
+ print(paste("main alpha file: ", main_alpha))
174
+ print(paste("main beta file: ", main_beta))
175
+ print(paste("da alpha file: ", da_alpha))
176
+ print(paste("da beta file: ", da_beta))
177
+ print(paste("contrast column: ", contrast_col))
178
+ print(paste("sample id column: ", sample_id_col))
179
+ print(paste("fc threshold: ", fc_cut))
180
+ print(paste("fdr threshold: ", fdr_cut))
181
+ print(paste("output folder: ", output_folder))
182
+
183
+ ## 1.1. TCR Discovery
184
+ # Get alpha/beta DEG tables
185
+ deg_alpha_table <- read.csv(da_alpha, header = TRUE, sep = ",", stringsAsFactors = FALSE)
186
+ deg_beta_table <- read.csv(da_beta, header = TRUE, sep = ",", stringsAsFactors = FALSE)
187
+
188
+ if (nrow(deg_alpha_table) == 0 || nrow(deg_beta_table) == 0) {
189
+ print("Warning: The DA alpha or beta tables are empty. No pairs will be found.")
190
+
191
+ # Create an emtpy output table with sall the required columns
192
+ required_cols <- c("Numerator", "tra", "trb", "estimate", "p.value", "p.adj", "tra_CDR3aa", "tra_VGene", "trb_CDR3aa", "trb_VGene", "is_max_correlation")
193
+ empty_table <- data.frame(matrix(ncol = length(required_cols), nrow = 0))
194
+ colnames(empty_table) <- required_cols
195
+ predicted_pairs_all <- empty_table
196
+
197
+ } else {
198
+ # Load metadata and main alpha/beta tables
199
+ metadata_table <- read.table(metadata, header = TRUE, sep = "\t", stringsAsFactors = FALSE, check.names = FALSE)
200
+ main_alpha_table <- read.table(main_alpha, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
201
+ main_beta_table <- read.table(main_beta, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
202
+
203
+ # check if sample_id_col is in metadata_table and if it has less values than internalSampleId
204
+ main_alpha_table$useSampleId <- main_alpha_table$internalSampleId
205
+ main_beta_table$useSampleId <- main_beta_table$internalSampleId
206
+ if (sample_id_col %in% colnames(metadata_table)) {
207
+ if (length(unique(metadata_table[, sample_id_col])) < length(unique(metadata_table$internalSampleId))) {
208
+ # Add original sample ID to the alpha and beta tables
209
+ main_alpha_table$useSampleId <- metadata_table[match(main_alpha_table$internalSampleId, metadata_table$internalSampleId), sample_id_col]
210
+ main_beta_table$useSampleId <- metadata_table[match(main_beta_table$internalSampleId, metadata_table$internalSampleId), sample_id_col]
211
+
212
+ metadata_table$useSampleId <- metadata_table[, sample_id_col]
213
+ }
214
+ }
215
+
216
+ # Make sure we have the same set of samples to compare
217
+ if (!identical(sort(unique(main_alpha_table$useSampleId)), sort(unique(main_beta_table$useSampleId)))) {
218
+ stop("Error: The sets of samples in the alpha and beta tables are not the same")
219
+ }
220
+
221
+ # Filter main tables to only include DEG clonotypes to reduce memory and processing time
222
+ # We will loose samples, so it's important to have it after the check above
223
+ deg_alpha_clonotypes <- unique(deg_alpha_table$clonotypeKey)
224
+ deg_beta_clonotypes <- unique(deg_beta_table$clonotypeKey)
225
+ main_alpha_table <- main_alpha_table[main_alpha_table$clonotypeKey %in% deg_alpha_clonotypes, ]
226
+ main_beta_table <- main_beta_table[main_beta_table$clonotypeKey %in% deg_beta_clonotypes, ]
227
+
228
+ # Get alpha/beta fraction matrices
229
+ alpha_matrix <- get_fraction_matrix(main_alpha_table)
230
+ beta_matrix <- get_fraction_matrix(main_beta_table)
231
+
232
+ # Get all possible values from Numerator column in both tables
233
+ numerators <- unique(c(deg_alpha_table$Numerator, deg_beta_table$Numerator))
234
+
235
+ # Find pairs per numerator and comine results into one table
236
+ predicted_pairs_all <- data.frame()
237
+ for (num in numerators) {
238
+ predicted_pairs <- find_pairs(deg_alpha_table, deg_beta_table, metadata_table,
239
+ contrast_col, alpha_matrix, beta_matrix, num)
240
+ predicted_pairs_all <- rbind(predicted_pairs_all, predicted_pairs)
241
+ }
242
+
243
+ # Filter out negative correlations
244
+ predicted_pairs_all <- predicted_pairs_all[predicted_pairs_all$estimate >= 0, ]
245
+
246
+
247
+ # Add TRA and TRB CDR3 aa and VGene data
248
+ ## Match tra column with alpha table
249
+ alpha_match_idx <- match(predicted_pairs_all$tra, main_alpha_table$clonotypeKey)
250
+ predicted_pairs_all$tra_CDR3aa <- main_alpha_table$CDR3aa[alpha_match_idx]
251
+ predicted_pairs_all$tra_VGene <- main_alpha_table$VGene[alpha_match_idx]
252
+ ## Match trb column with beta table
253
+ beta_match_idx <- match(predicted_pairs_all$trb, main_beta_table$clonotypeKey)
254
+ predicted_pairs_all$trb_CDR3aa <- main_beta_table$CDR3aa[beta_match_idx]
255
+ predicted_pairs_all$trb_VGene <- main_beta_table$VGene[beta_match_idx]
256
+
257
+ # Add column indicating if this pair's correlation is the maximum for either tra or trb
258
+ predicted_pairs_all$is_max_correlation <-
259
+ (predicted_pairs_all$estimate == ave(predicted_pairs_all$estimate, predicted_pairs_all$tra, FUN = max)) |
260
+ (predicted_pairs_all$estimate == ave(predicted_pairs_all$estimate, predicted_pairs_all$trb, FUN = max))
261
+ predicted_pairs_all$is_max_correlation[predicted_pairs_all$is_max_correlation == TRUE] <- "max"
262
+ predicted_pairs_all$is_max_correlation[predicted_pairs_all$is_max_correlation == FALSE] <- "not_max"
263
+
264
+ # Order by tra
265
+ predicted_pairs_all <- predicted_pairs_all[order(predicted_pairs_all$tra), ]
266
+ }
267
+
268
+ #save ft_ and ct_filtered in the output_folder
269
+ write.table(predicted_pairs_all, paste0(output_folder, "/ab_pairs.tsv"),
270
+ sep = "\t", quote = F, row.names = F)
271
+
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env Rscript
2
+
3
+ # Load required libraries
4
+ suppressMessages(library("optparse"))
5
+ #----------------------------------------
6
+
7
+ # Main code
8
+
9
+ # Parse command line arguments
10
+ option_list <- list(
11
+ make_option(c("--main_alpha"),
12
+ type = "character", default = "mainAlpha.tsv",
13
+ help = "Path to main TCR alpha clonotypes TSV file", metavar = "character"
14
+ ),
15
+ make_option(c("--main_beta"),
16
+ type = "character", default = "mainBeta.tsv",
17
+ help = "Path to main TCR beta clonotypes TSV file", metavar = "character"
18
+ ),
19
+ make_option(c("--da_alpha"),
20
+ type = "character", default = "daAlpha.csv",
21
+ help = "Path to DA alpha CSV file", metavar = "character"
22
+ ),
23
+ make_option(c("--da_beta"),
24
+ type = "character", default = "daBeta.csv",
25
+ help = "Path to DA beta CSV file", metavar = "character"
26
+ ),
27
+ make_option(c("-o", "--output"),
28
+ type = "character",
29
+ default = ".",
30
+ help = "Output folder for TSV results", metavar = "character"
31
+ )
32
+ )
33
+
34
+ opt_parser <- OptionParser(option_list = option_list)
35
+ opt <- parse_args(opt_parser)
36
+
37
+ # Get input data
38
+ main_alpha <- opt$main_alpha
39
+ main_beta <- opt$main_beta
40
+ da_alpha <- opt$da_alpha
41
+ da_beta <- opt$da_beta
42
+ output_folder <- opt$output
43
+
44
+ # test
45
+ # main_alpha <- "./mainAlpha.tsv"
46
+ # main_beta <- "./mainBeta.tsv"
47
+ # da_alpha <- "./DA_alpha.csv"
48
+ # da_beta <- "./DA_beta.csv"
49
+ # output_folder <- "./resultsPairing"
50
+
51
+
52
+ ## 1.1. TCR Discovery
53
+ # Load metadata
54
+ main_alpha_table <- read.table(main_alpha, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
55
+ main_beta_table <- read.table(main_beta, header = TRUE, sep = "\t", stringsAsFactors = FALSE)
56
+
57
+ # Get alpha/beta DEG tables
58
+ deg_alpha_table <- read.csv(da_alpha, header = TRUE, sep = ",", stringsAsFactors = FALSE)
59
+ deg_beta_table <- read.csv(da_beta, header = TRUE, sep = ",", stringsAsFactors = FALSE)
60
+
61
+ # Keep only DA clonotypes from main tables
62
+ main_alpha_table <- main_alpha_table[main_alpha_table$clonotypeKey %in% unique(deg_alpha_table$clonotypeKey), ]
63
+ main_beta_table <- main_beta_table[main_beta_table$clonotypeKey %in% unique(deg_beta_table$clonotypeKey), ]
64
+
65
+ # Store the tables
66
+ if (!dir.exists(output_folder)) {
67
+ dir.create(output_folder, recursive = TRUE)
68
+ }
69
+
70
+ if ("subset" %in% colnames(main_alpha_table)) {
71
+ keep_cols1 <- c("internalSampleId", "clonotypeKey", "fraction", "subset")
72
+ keep_cols2 <- c("clonotypeKey", "subset")
73
+ } else {
74
+ keep_cols1 <- c("internalSampleId", "clonotypeKey", "fraction")
75
+ keep_cols2 <- c("clonotypeKey")
76
+ }
77
+ write.table(main_alpha_table[, keep_cols1],
78
+ paste0(output_folder, "/main_alpha_frequencies.tsv"),
79
+ sep = "\t", quote = F, row.names = F)
80
+ write.table(main_beta_table[, keep_cols1],
81
+ paste0(output_folder, "/main_beta_frequencies.tsv"),
82
+ sep = "\t", quote = F, row.names = F)
83
+
84
+ if ("subset" %in% colnames(main_alpha_table)) {
85
+ # store clonotype to subset mapping removing repeated lines
86
+ clonotype_to_subset_alpha <- unique(main_alpha_table[, c("clonotypeKey", "subset")])
87
+ clonotype_to_subset_beta <- unique(main_beta_table[, c("clonotypeKey", "subset")])
88
+
89
+ write.table(clonotype_to_subset_alpha, paste0(output_folder, "/clonotype_to_subset_alpha.tsv"),
90
+ sep = "\t", quote = F, row.names = F)
91
+ write.table(clonotype_to_subset_beta, paste0(output_folder, "/clonotype_to_subset_beta.tsv"),
92
+ sep = "\t", quote = F, row.names = F)
93
+ } else {
94
+ # Create empty tables
95
+ clonotype_to_subset_alpha <- data.frame(clonotypeKey = character(), subset = character())
96
+ clonotype_to_subset_beta <- data.frame(clonotypeKey = character(), subset = character())
97
+
98
+ write.table(clonotype_to_subset_alpha, paste0(output_folder, "/clonotype_to_subset_alpha.tsv"),
99
+ sep = "\t", quote = F, row.names = F)
100
+ write.table(clonotype_to_subset_beta, paste0(output_folder, "/clonotype_to_subset_beta.tsv"),
101
+ sep = "\t", quote = F, row.names = F)
102
+ }