@sjcrh/proteinpaint-server 2.129.5 → 2.129.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/routes/gdc.grin2.list.js +125 -53
- package/routes/gdc.grin2.run.js +82 -11
- package/src/app.js +254 -110
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.129.
|
|
3
|
+
"version": "2.129.6",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -62,10 +62,10 @@
|
|
|
62
62
|
"dependencies": {
|
|
63
63
|
"@sjcrh/augen": "2.121.0",
|
|
64
64
|
"@sjcrh/proteinpaint-python": "2.118.0",
|
|
65
|
-
"@sjcrh/proteinpaint-r": "2.129.
|
|
66
|
-
"@sjcrh/proteinpaint-rust": "2.129.
|
|
67
|
-
"@sjcrh/proteinpaint-shared": "2.129.
|
|
68
|
-
"@sjcrh/proteinpaint-types": "2.129.
|
|
65
|
+
"@sjcrh/proteinpaint-r": "2.129.6",
|
|
66
|
+
"@sjcrh/proteinpaint-rust": "2.129.6",
|
|
67
|
+
"@sjcrh/proteinpaint-shared": "2.129.6",
|
|
68
|
+
"@sjcrh/proteinpaint-types": "2.129.6",
|
|
69
69
|
"@types/express": "^5.0.0",
|
|
70
70
|
"@types/express-session": "^1.18.1",
|
|
71
71
|
"better-sqlite3": "^9.4.1",
|
package/routes/gdc.grin2.list.js
CHANGED
|
@@ -2,7 +2,8 @@ import { gdcGRIN2listPayload } from "#types/checkers";
|
|
|
2
2
|
import ky from "ky";
|
|
3
3
|
import { joinUrl } from "#shared/joinUrl.js";
|
|
4
4
|
import serverconfig from "#src/serverconfig.js";
|
|
5
|
-
|
|
5
|
+
import { mayLog } from "#src/helpers.ts";
|
|
6
|
+
const mafMaxFileNumber = 1e3, cnvMaxFileNumber = 1e3;
|
|
6
7
|
const allowedWorkflowType = "Aliquot Ensemble Somatic Variant Merging and Masking";
|
|
7
8
|
const maxFileSizeAllowed = 1e6;
|
|
8
9
|
const maxTotalSizeCompressed = serverconfig.features.gdcMafMaxFileSize || 4e8;
|
|
@@ -25,48 +26,45 @@ function init({ genomes }) {
|
|
|
25
26
|
const g = genomes.hg38;
|
|
26
27
|
if (!g)
|
|
27
28
|
throw "hg38 missing";
|
|
28
|
-
const ds = g.datasets
|
|
29
|
+
const ds = g.datasets?.GDC;
|
|
29
30
|
if (!ds)
|
|
30
31
|
throw "hg38 GDC missing";
|
|
31
|
-
const
|
|
32
|
-
|
|
32
|
+
const result = {
|
|
33
|
+
files: [],
|
|
34
|
+
filesTotal: 0,
|
|
35
|
+
maxTotalSizeCompressed: 0,
|
|
36
|
+
fileCounts: { maf: 0 }
|
|
37
|
+
};
|
|
38
|
+
await mayListMafFiles(req.query, result, ds);
|
|
39
|
+
await mayListCnvFiles(req.query, result, ds);
|
|
40
|
+
res.send(result);
|
|
33
41
|
} catch (e) {
|
|
42
|
+
if (e.stack)
|
|
43
|
+
console.log(e.stack);
|
|
34
44
|
res.send({ status: "error", error: e.message || e });
|
|
35
45
|
}
|
|
36
46
|
};
|
|
37
47
|
}
|
|
38
|
-
async function
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
throw "Missing experimentalStrategy parameter for MAF file retrieval";
|
|
43
|
-
}
|
|
44
|
-
const dataFormatFilter = {
|
|
48
|
+
async function mayListMafFiles(q, result, ds) {
|
|
49
|
+
if (!q.mafOptions)
|
|
50
|
+
return;
|
|
51
|
+
const filters = {
|
|
45
52
|
op: "and",
|
|
46
|
-
content: [
|
|
53
|
+
content: [
|
|
54
|
+
{ op: "=", content: { field: "data_format", value: "MAF" } },
|
|
55
|
+
{ op: "=", content: { field: "experimental_strategy", value: q.mafOptions.experimentalStrategy } },
|
|
56
|
+
{ op: "=", content: { field: "analysis.workflow_type", value: allowedWorkflowType } },
|
|
57
|
+
{ op: "=", content: { field: "access", value: "open" } }
|
|
58
|
+
]
|
|
47
59
|
};
|
|
48
|
-
let filters;
|
|
49
|
-
if (shouldRetrieveMaf) {
|
|
50
|
-
filters = {
|
|
51
|
-
op: "and",
|
|
52
|
-
content: [
|
|
53
|
-
dataFormatFilter,
|
|
54
|
-
{ op: "=", content: { field: "experimental_strategy", value: experimentalStrategy } },
|
|
55
|
-
{ op: "=", content: { field: "analysis.workflow_type", value: allowedWorkflowType } },
|
|
56
|
-
{ op: "=", content: { field: "access", value: "open" } }
|
|
57
|
-
]
|
|
58
|
-
};
|
|
59
|
-
} else {
|
|
60
|
-
throw "At least one file type option must be specified (mafOptions, cnvOptions, or fusionOptions)";
|
|
61
|
-
}
|
|
62
60
|
const case_filters = { op: "and", content: [] };
|
|
63
61
|
if (q.filter0) {
|
|
64
62
|
case_filters.content.push(q.filter0);
|
|
65
63
|
}
|
|
66
|
-
const { host } = ds.getHostHeaders(q);
|
|
64
|
+
const { host, headers } = ds.getHostHeaders(q);
|
|
67
65
|
const body = {
|
|
68
66
|
filters,
|
|
69
|
-
size:
|
|
67
|
+
size: mafMaxFileNumber,
|
|
70
68
|
fields: [
|
|
71
69
|
"id",
|
|
72
70
|
"file_size",
|
|
@@ -79,7 +77,7 @@ async function listMafFiles(q, ds) {
|
|
|
79
77
|
};
|
|
80
78
|
if (case_filters.content.length)
|
|
81
79
|
body.case_filters = case_filters;
|
|
82
|
-
const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, json: body });
|
|
80
|
+
const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, headers, json: body });
|
|
83
81
|
if (!response.ok)
|
|
84
82
|
throw `HTTP Error: ${response.status} ${response.statusText}`;
|
|
85
83
|
const re = await response.json();
|
|
@@ -99,7 +97,7 @@ async function listMafFiles(q, ds) {
|
|
|
99
97
|
fileSize: h.file_size,
|
|
100
98
|
reason: `File size (${h.file_size} bytes) exceeds maximum allowed size (${maxFileSizeAllowed} bytes)`
|
|
101
99
|
});
|
|
102
|
-
|
|
100
|
+
mayLog(
|
|
103
101
|
`File ${h.id} with a size of ${h.file_size} bytes is larger then the allowed file size. It is excluded from the list.
|
|
104
102
|
If you want to include it, please increase the maxFileSizeAllowed in the code.`
|
|
105
103
|
);
|
|
@@ -149,39 +147,113 @@ If you want to include it, please increase the maxFileSizeAllowed in the code.`
|
|
|
149
147
|
fileCount: caseFiles.length,
|
|
150
148
|
keptFileSize: caseFiles[0].file_size
|
|
151
149
|
});
|
|
152
|
-
|
|
153
|
-
`Case ${caseId}: Found ${caseFiles.length} MAF files, keeping largest (${caseFiles[0].file_size} bytes)`
|
|
154
|
-
);
|
|
150
|
+
mayLog(`Case ${caseId}: Found ${caseFiles.length} MAF files, keeping largest (${caseFiles[0].file_size} bytes)`);
|
|
155
151
|
} else {
|
|
156
152
|
deduplicatedFiles.push(caseFiles[0]);
|
|
157
153
|
}
|
|
158
154
|
}
|
|
159
155
|
if (duplicatesRemoved > 0) {
|
|
160
|
-
|
|
156
|
+
mayLog(
|
|
161
157
|
`GRIN2 MAF deduplication: Removed ${duplicatesRemoved} duplicate files, kept ${deduplicatedFiles.length} unique cases`
|
|
162
158
|
);
|
|
163
159
|
}
|
|
164
160
|
deduplicatedFiles.sort((a, b) => b.file_size - a.file_size);
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
deduplicationStats: {
|
|
177
|
-
originalFileCount: files.length,
|
|
178
|
-
deduplicatedFileCount: deduplicatedFiles.length,
|
|
179
|
-
duplicatesRemoved,
|
|
180
|
-
caseDetails,
|
|
181
|
-
filteredFiles
|
|
182
|
-
}
|
|
161
|
+
result.files.push(...deduplicatedFiles);
|
|
162
|
+
result.filesTotal = re.data.pagination.total;
|
|
163
|
+
if (result.fileCounts) {
|
|
164
|
+
result.fileCounts.maf = files.length;
|
|
165
|
+
}
|
|
166
|
+
result.deduplicationStats = {
|
|
167
|
+
originalFileCount: files.length,
|
|
168
|
+
deduplicatedFileCount: deduplicatedFiles.length,
|
|
169
|
+
duplicatesRemoved,
|
|
170
|
+
caseDetails,
|
|
171
|
+
filteredFiles
|
|
183
172
|
};
|
|
184
|
-
|
|
173
|
+
}
|
|
174
|
+
async function mayListCnvFiles(q, result, ds) {
|
|
175
|
+
result.cnvFiles = { files: [] };
|
|
176
|
+
if (!q.cnvOptions) {
|
|
177
|
+
console.log("No cnvOptions provided, returning empty cnvFiles");
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
const fields = [
|
|
181
|
+
"cases.samples.tissue_type",
|
|
182
|
+
"cases.project.project_id",
|
|
183
|
+
"cases.submitter_id",
|
|
184
|
+
"cases.case_id",
|
|
185
|
+
"data_type",
|
|
186
|
+
"file_id",
|
|
187
|
+
"file_size",
|
|
188
|
+
"data_format",
|
|
189
|
+
"experimental_strategy",
|
|
190
|
+
"analysis.workflow_type"
|
|
191
|
+
];
|
|
192
|
+
const { host, headers } = ds.getHostHeaders(q);
|
|
193
|
+
try {
|
|
194
|
+
const re = await ky.post(joinUrl(host.rest, "files"), {
|
|
195
|
+
timeout: false,
|
|
196
|
+
headers,
|
|
197
|
+
json: {
|
|
198
|
+
size: cnvMaxFileNumber,
|
|
199
|
+
fields: fields.join(","),
|
|
200
|
+
filters: {
|
|
201
|
+
op: "in",
|
|
202
|
+
content: {
|
|
203
|
+
field: "data_type",
|
|
204
|
+
value: ["Masked Copy Number Segment", "Allele-specific Copy Number Segment"]
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}).json();
|
|
209
|
+
console.log("API Response:", {
|
|
210
|
+
hits: re.data?.hits?.length || 0,
|
|
211
|
+
firstHit: re.data?.hits?.[0]
|
|
212
|
+
});
|
|
213
|
+
if (!Array.isArray(re.data.hits)) {
|
|
214
|
+
throw new Error("API response data.hits is not an array");
|
|
215
|
+
}
|
|
216
|
+
const cnvFiles = [];
|
|
217
|
+
for (const h of re.data.hits) {
|
|
218
|
+
console.log("Processing file:", {
|
|
219
|
+
file_id: h.file_id,
|
|
220
|
+
data_format: h.data_format,
|
|
221
|
+
data_type: h.data_type,
|
|
222
|
+
cases_length: h.cases?.length
|
|
223
|
+
});
|
|
224
|
+
if (h.data_format === "TXT") {
|
|
225
|
+
if (h.data_type === "Masked Copy Number Segment" || h.data_type === "Allele-specific Copy Number Segment") {
|
|
226
|
+
const c = h.cases?.[0];
|
|
227
|
+
if (!c) {
|
|
228
|
+
console.warn(`Skipping file ${h.file_id} - missing case data`);
|
|
229
|
+
continue;
|
|
230
|
+
}
|
|
231
|
+
console.log("Case structure:", {
|
|
232
|
+
case_id: c.case_id,
|
|
233
|
+
submitter_id: c.submitter_id,
|
|
234
|
+
project: c.project,
|
|
235
|
+
samples: c.samples?.length
|
|
236
|
+
});
|
|
237
|
+
const file = {
|
|
238
|
+
id: h.file_id || h.id,
|
|
239
|
+
// Handle both field names
|
|
240
|
+
project_id: c.project?.project_id || "unknown",
|
|
241
|
+
// Safe access with fallback
|
|
242
|
+
file_size: h.file_size,
|
|
243
|
+
case_submitter_id: c.submitter_id,
|
|
244
|
+
case_uuid: c.case_id,
|
|
245
|
+
sample_types: c.samples?.map((s) => s.tissue_type).filter(Boolean) || []
|
|
246
|
+
};
|
|
247
|
+
cnvFiles.push(file);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
result.cnvFiles = { files: cnvFiles };
|
|
252
|
+
console.log(`Successfully processed ${cnvFiles.length} CNV files`);
|
|
253
|
+
} catch (error) {
|
|
254
|
+
console.error("Error fetching CNV files:", error);
|
|
255
|
+
result.cnvFiles = { files: [] };
|
|
256
|
+
}
|
|
185
257
|
}
|
|
186
258
|
export {
|
|
187
259
|
api,
|
package/routes/gdc.grin2.run.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { runGRIN2Payload } from "#types/checkers";
|
|
2
|
-
import {
|
|
2
|
+
import { stream_rust } from "@sjcrh/proteinpaint-rust";
|
|
3
3
|
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
4
4
|
import serverconfig from "#src/serverconfig.js";
|
|
5
5
|
import path from "path";
|
|
6
|
+
import { formatElapsedTime } from "@sjcrh/proteinpaint-shared/time.js";
|
|
6
7
|
const api = {
|
|
7
8
|
endpoint: "gdc/runGRIN2",
|
|
8
9
|
methods: {
|
|
@@ -16,6 +17,43 @@ const api = {
|
|
|
16
17
|
}
|
|
17
18
|
}
|
|
18
19
|
};
|
|
20
|
+
function parseJsonlOutput(rustOutput) {
|
|
21
|
+
const lines = rustOutput.trim().split("\n");
|
|
22
|
+
const allSuccessfulData = [];
|
|
23
|
+
let finalSummary = null;
|
|
24
|
+
for (const line of lines) {
|
|
25
|
+
const trimmedLine = line.trim();
|
|
26
|
+
if (trimmedLine) {
|
|
27
|
+
try {
|
|
28
|
+
const data = JSON.parse(trimmedLine);
|
|
29
|
+
if (data.type === "data") {
|
|
30
|
+
allSuccessfulData.push(data.data);
|
|
31
|
+
} else if (data.type === "summary") {
|
|
32
|
+
finalSummary = data;
|
|
33
|
+
console.log(`[GRIN2] Download complete: ${data.successful_files}/${data.total_files} files successful`);
|
|
34
|
+
if (data.failed_files > 0) {
|
|
35
|
+
console.log(`[GRIN2] ${data.failed_files} files failed`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
} catch (parseError) {
|
|
39
|
+
console.error("[GRIN2] JSONL parse error:", parseError);
|
|
40
|
+
console.error("[GRIN2] Problematic line:", trimmedLine);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
if (!finalSummary) {
|
|
45
|
+
throw new Error("No summary found in Rust output");
|
|
46
|
+
}
|
|
47
|
+
return {
|
|
48
|
+
successful_data: allSuccessfulData,
|
|
49
|
+
failed_files: finalSummary.errors || [],
|
|
50
|
+
summary: {
|
|
51
|
+
total_files: finalSummary.total_files,
|
|
52
|
+
successful_files: finalSummary.successful_files,
|
|
53
|
+
failed_files: finalSummary.failed_files
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
}
|
|
19
57
|
function init({ genomes }) {
|
|
20
58
|
return async (req, res) => {
|
|
21
59
|
try {
|
|
@@ -29,22 +67,54 @@ function init({ genomes }) {
|
|
|
29
67
|
console.log(`[GRIN2] Request received:`, JSON.stringify(req.query));
|
|
30
68
|
const parsedRequest = req.query;
|
|
31
69
|
console.log(`[GRIN2] Parsed request: ${JSON.stringify(parsedRequest)}`);
|
|
32
|
-
console.log("[GRIN2] Calling Rust for file processing...");
|
|
33
70
|
const rustInput = JSON.stringify({
|
|
34
71
|
caseFiles: parsedRequest.caseFiles,
|
|
35
72
|
mafOptions: parsedRequest.mafOptions
|
|
36
73
|
});
|
|
37
|
-
console.log("[GRIN2] Executing Rust function...");
|
|
38
|
-
|
|
74
|
+
console.log("[GRIN2] Executing Rust function with streaming...");
|
|
75
|
+
let rustOutput = "";
|
|
76
|
+
let buffer = "";
|
|
77
|
+
const downloadStartTime = Date.now();
|
|
78
|
+
const streamResult = stream_rust("gdcGRIN2", rustInput, (errors) => {
|
|
79
|
+
if (errors) {
|
|
80
|
+
throw new Error(`Rust process failed: ${errors}`);
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
if (!streamResult) {
|
|
84
|
+
throw new Error("Failed to start Rust streaming process");
|
|
85
|
+
}
|
|
86
|
+
for await (const chunk of streamResult.rustStream) {
|
|
87
|
+
const chunkStr = chunk.toString();
|
|
88
|
+
rustOutput += chunkStr;
|
|
89
|
+
buffer += chunkStr;
|
|
90
|
+
const lines = buffer.split("\n");
|
|
91
|
+
buffer = lines.pop() || "";
|
|
92
|
+
for (const line of lines) {
|
|
93
|
+
const trimmedLine = line.trim();
|
|
94
|
+
if (trimmedLine) {
|
|
95
|
+
try {
|
|
96
|
+
const data = JSON.parse(trimmedLine);
|
|
97
|
+
if (data.type === "summary") {
|
|
98
|
+
console.log(`[GRIN2] Download complete: ${data.successful_files}/${data.total_files} files successful`);
|
|
99
|
+
if (data.failed_files > 0) {
|
|
100
|
+
console.log(`[GRIN2] ${data.failed_files} files failed`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
} catch (_parseError) {
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
39
108
|
console.log("[GRIN2] Rust execution completed");
|
|
109
|
+
const downloadTime = formatElapsedTime(Date.now() - downloadStartTime);
|
|
110
|
+
console.log(`[GRIN2] Rust processing took ${downloadTime}`);
|
|
111
|
+
const rustResult = parseJsonlOutput(rustOutput);
|
|
40
112
|
if (!rustResult) {
|
|
41
113
|
throw new Error("Failed to process MAF files: No result from Rust");
|
|
42
114
|
}
|
|
43
|
-
|
|
115
|
+
const parsedRustResult = rustResult;
|
|
44
116
|
let dataForR = [];
|
|
45
117
|
try {
|
|
46
|
-
parsedRustResult = typeof rustResult === "string" ? JSON.parse(rustResult) : rustResult;
|
|
47
|
-
console.log(`[GRIN2] Parsed Rust result structure received`);
|
|
48
118
|
if (parsedRustResult.successful_data && Array.isArray(parsedRustResult.successful_data)) {
|
|
49
119
|
dataForR = parsedRustResult.successful_data.flat();
|
|
50
120
|
console.log(`[GRIN2] Extracted ${dataForR.length} records for R script`);
|
|
@@ -56,7 +126,7 @@ function init({ genomes }) {
|
|
|
56
126
|
dataForR = [];
|
|
57
127
|
}
|
|
58
128
|
} catch (parseError) {
|
|
59
|
-
console.error("[GRIN2] Error
|
|
129
|
+
console.error("[GRIN2] Error processing Rust result:", parseError);
|
|
60
130
|
dataForR = [];
|
|
61
131
|
}
|
|
62
132
|
const genedbfile = path.join(serverconfig.tpmasterdir, g.genedb.dbfile);
|
|
@@ -68,20 +138,21 @@ function init({ genomes }) {
|
|
|
68
138
|
lesion: dataForR
|
|
69
139
|
// The mutation string from Rust
|
|
70
140
|
});
|
|
71
|
-
console.log(`R input: ${rInput}`);
|
|
72
141
|
console.log("[GRIN2] Executing R script...");
|
|
142
|
+
const rAnalysisTime = Date.now();
|
|
73
143
|
const rResult = await run_R("gdcGRIN2.R", rInput, []);
|
|
74
|
-
console.log(
|
|
144
|
+
console.log("[GRIN2] R execution completed");
|
|
145
|
+
console.log(`[GRIN2] R analysis took ${formatElapsedTime(Date.now() - rAnalysisTime)}`);
|
|
75
146
|
let resultData;
|
|
76
147
|
try {
|
|
77
148
|
resultData = JSON.parse(rResult);
|
|
78
149
|
console.log("[GRIN2] Finished R analysis");
|
|
79
150
|
const pngImg = resultData.png[0];
|
|
80
151
|
const topGeneTable = resultData.topGeneTable || null;
|
|
152
|
+
console.log("[GRIN2] Total GRIN2 processing time:", formatElapsedTime(Date.now() - downloadStartTime));
|
|
81
153
|
return res.json({ pngImg, topGeneTable, rustResult: parsedRustResult, status: "success" });
|
|
82
154
|
} catch (parseError) {
|
|
83
155
|
console.error("[GRIN2] Error parsing R result:", parseError);
|
|
84
|
-
console.log("[GRIN2] Raw R result:", rResult);
|
|
85
156
|
}
|
|
86
157
|
} catch (e) {
|
|
87
158
|
console.error("[GRIN2] Error running analysis:", e);
|