@sjcrh/proteinpaint-server 2.129.5 → 2.129.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.129.5",
3
+ "version": "2.129.6",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -62,10 +62,10 @@
62
62
  "dependencies": {
63
63
  "@sjcrh/augen": "2.121.0",
64
64
  "@sjcrh/proteinpaint-python": "2.118.0",
65
- "@sjcrh/proteinpaint-r": "2.129.2",
66
- "@sjcrh/proteinpaint-rust": "2.129.2",
67
- "@sjcrh/proteinpaint-shared": "2.129.5",
68
- "@sjcrh/proteinpaint-types": "2.129.2",
65
+ "@sjcrh/proteinpaint-r": "2.129.6",
66
+ "@sjcrh/proteinpaint-rust": "2.129.6",
67
+ "@sjcrh/proteinpaint-shared": "2.129.6",
68
+ "@sjcrh/proteinpaint-types": "2.129.6",
69
69
  "@types/express": "^5.0.0",
70
70
  "@types/express-session": "^1.18.1",
71
71
  "better-sqlite3": "^9.4.1",
@@ -2,7 +2,8 @@ import { gdcGRIN2listPayload } from "#types/checkers";
2
2
  import ky from "ky";
3
3
  import { joinUrl } from "#shared/joinUrl.js";
4
4
  import serverconfig from "#src/serverconfig.js";
5
- const maxFileNumber = 1e3;
5
+ import { mayLog } from "#src/helpers.ts";
6
+ const mafMaxFileNumber = 1e3, cnvMaxFileNumber = 1e3;
6
7
  const allowedWorkflowType = "Aliquot Ensemble Somatic Variant Merging and Masking";
7
8
  const maxFileSizeAllowed = 1e6;
8
9
  const maxTotalSizeCompressed = serverconfig.features.gdcMafMaxFileSize || 4e8;
@@ -25,48 +26,45 @@ function init({ genomes }) {
25
26
  const g = genomes.hg38;
26
27
  if (!g)
27
28
  throw "hg38 missing";
28
- const ds = g.datasets.GDC;
29
+ const ds = g.datasets?.GDC;
29
30
  if (!ds)
30
31
  throw "hg38 GDC missing";
31
- const payload = await listMafFiles(req.query, ds);
32
- res.send(payload);
32
+ const result = {
33
+ files: [],
34
+ filesTotal: 0,
35
+ maxTotalSizeCompressed: 0,
36
+ fileCounts: { maf: 0 }
37
+ };
38
+ await mayListMafFiles(req.query, result, ds);
39
+ await mayListCnvFiles(req.query, result, ds);
40
+ res.send(result);
33
41
  } catch (e) {
42
+ if (e.stack)
43
+ console.log(e.stack);
34
44
  res.send({ status: "error", error: e.message || e });
35
45
  }
36
46
  };
37
47
  }
38
- async function listMafFiles(q, ds) {
39
- const shouldRetrieveMaf = !!q.mafOptions;
40
- const experimentalStrategy = q.mafOptions?.experimentalStrategy;
41
- if (shouldRetrieveMaf && !experimentalStrategy) {
42
- throw "Missing experimentalStrategy parameter for MAF file retrieval";
43
- }
44
- const dataFormatFilter = {
48
+ async function mayListMafFiles(q, result, ds) {
49
+ if (!q.mafOptions)
50
+ return;
51
+ const filters = {
45
52
  op: "and",
46
- content: [{ op: "=", content: { field: "data_format", value: "MAF" } }]
53
+ content: [
54
+ { op: "=", content: { field: "data_format", value: "MAF" } },
55
+ { op: "=", content: { field: "experimental_strategy", value: q.mafOptions.experimentalStrategy } },
56
+ { op: "=", content: { field: "analysis.workflow_type", value: allowedWorkflowType } },
57
+ { op: "=", content: { field: "access", value: "open" } }
58
+ ]
47
59
  };
48
- let filters;
49
- if (shouldRetrieveMaf) {
50
- filters = {
51
- op: "and",
52
- content: [
53
- dataFormatFilter,
54
- { op: "=", content: { field: "experimental_strategy", value: experimentalStrategy } },
55
- { op: "=", content: { field: "analysis.workflow_type", value: allowedWorkflowType } },
56
- { op: "=", content: { field: "access", value: "open" } }
57
- ]
58
- };
59
- } else {
60
- throw "At least one file type option must be specified (mafOptions, cnvOptions, or fusionOptions)";
61
- }
62
60
  const case_filters = { op: "and", content: [] };
63
61
  if (q.filter0) {
64
62
  case_filters.content.push(q.filter0);
65
63
  }
66
- const { host } = ds.getHostHeaders(q);
64
+ const { host, headers } = ds.getHostHeaders(q);
67
65
  const body = {
68
66
  filters,
69
- size: maxFileNumber,
67
+ size: mafMaxFileNumber,
70
68
  fields: [
71
69
  "id",
72
70
  "file_size",
@@ -79,7 +77,7 @@ async function listMafFiles(q, ds) {
79
77
  };
80
78
  if (case_filters.content.length)
81
79
  body.case_filters = case_filters;
82
- const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, json: body });
80
+ const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, headers, json: body });
83
81
  if (!response.ok)
84
82
  throw `HTTP Error: ${response.status} ${response.statusText}`;
85
83
  const re = await response.json();
@@ -99,7 +97,7 @@ async function listMafFiles(q, ds) {
99
97
  fileSize: h.file_size,
100
98
  reason: `File size (${h.file_size} bytes) exceeds maximum allowed size (${maxFileSizeAllowed} bytes)`
101
99
  });
102
- console.log(
100
+ mayLog(
103
101
  `File ${h.id} with a size of ${h.file_size} bytes is larger then the allowed file size. It is excluded from the list.
104
102
  If you want to include it, please increase the maxFileSizeAllowed in the code.`
105
103
  );
@@ -149,39 +147,113 @@ If you want to include it, please increase the maxFileSizeAllowed in the code.`
149
147
  fileCount: caseFiles.length,
150
148
  keptFileSize: caseFiles[0].file_size
151
149
  });
152
- console.log(
153
- `Case ${caseId}: Found ${caseFiles.length} MAF files, keeping largest (${caseFiles[0].file_size} bytes)`
154
- );
150
+ mayLog(`Case ${caseId}: Found ${caseFiles.length} MAF files, keeping largest (${caseFiles[0].file_size} bytes)`);
155
151
  } else {
156
152
  deduplicatedFiles.push(caseFiles[0]);
157
153
  }
158
154
  }
159
155
  if (duplicatesRemoved > 0) {
160
- console.log(
156
+ mayLog(
161
157
  `GRIN2 MAF deduplication: Removed ${duplicatesRemoved} duplicate files, kept ${deduplicatedFiles.length} unique cases`
162
158
  );
163
159
  }
164
160
  deduplicatedFiles.sort((a, b) => b.file_size - a.file_size);
165
- const result = {
166
- files: deduplicatedFiles,
167
- filesTotal: re.data.pagination.total,
168
- maxTotalSizeCompressed,
169
- fileCounts: {
170
- maf: files.length
171
- },
172
- appliedFilters: {
173
- fileTypes: shouldRetrieveMaf ? ["MAF"] : [],
174
- experimentalStrategy
175
- },
176
- deduplicationStats: {
177
- originalFileCount: files.length,
178
- deduplicatedFileCount: deduplicatedFiles.length,
179
- duplicatesRemoved,
180
- caseDetails,
181
- filteredFiles
182
- }
161
+ result.files.push(...deduplicatedFiles);
162
+ result.filesTotal = re.data.pagination.total;
163
+ if (result.fileCounts) {
164
+ result.fileCounts.maf = files.length;
165
+ }
166
+ result.deduplicationStats = {
167
+ originalFileCount: files.length,
168
+ deduplicatedFileCount: deduplicatedFiles.length,
169
+ duplicatesRemoved,
170
+ caseDetails,
171
+ filteredFiles
183
172
  };
184
- return result;
173
+ }
174
+ async function mayListCnvFiles(q, result, ds) {
175
+ result.cnvFiles = { files: [] };
176
+ if (!q.cnvOptions) {
177
+ console.log("No cnvOptions provided, returning empty cnvFiles");
178
+ return;
179
+ }
180
+ const fields = [
181
+ "cases.samples.tissue_type",
182
+ "cases.project.project_id",
183
+ "cases.submitter_id",
184
+ "cases.case_id",
185
+ "data_type",
186
+ "file_id",
187
+ "file_size",
188
+ "data_format",
189
+ "experimental_strategy",
190
+ "analysis.workflow_type"
191
+ ];
192
+ const { host, headers } = ds.getHostHeaders(q);
193
+ try {
194
+ const re = await ky.post(joinUrl(host.rest, "files"), {
195
+ timeout: false,
196
+ headers,
197
+ json: {
198
+ size: cnvMaxFileNumber,
199
+ fields: fields.join(","),
200
+ filters: {
201
+ op: "in",
202
+ content: {
203
+ field: "data_type",
204
+ value: ["Masked Copy Number Segment", "Allele-specific Copy Number Segment"]
205
+ }
206
+ }
207
+ }
208
+ }).json();
209
+ console.log("API Response:", {
210
+ hits: re.data?.hits?.length || 0,
211
+ firstHit: re.data?.hits?.[0]
212
+ });
213
+ if (!Array.isArray(re.data.hits)) {
214
+ throw new Error("API response data.hits is not an array");
215
+ }
216
+ const cnvFiles = [];
217
+ for (const h of re.data.hits) {
218
+ console.log("Processing file:", {
219
+ file_id: h.file_id,
220
+ data_format: h.data_format,
221
+ data_type: h.data_type,
222
+ cases_length: h.cases?.length
223
+ });
224
+ if (h.data_format === "TXT") {
225
+ if (h.data_type === "Masked Copy Number Segment" || h.data_type === "Allele-specific Copy Number Segment") {
226
+ const c = h.cases?.[0];
227
+ if (!c) {
228
+ console.warn(`Skipping file ${h.file_id} - missing case data`);
229
+ continue;
230
+ }
231
+ console.log("Case structure:", {
232
+ case_id: c.case_id,
233
+ submitter_id: c.submitter_id,
234
+ project: c.project,
235
+ samples: c.samples?.length
236
+ });
237
+ const file = {
238
+ id: h.file_id || h.id,
239
+ // Handle both field names
240
+ project_id: c.project?.project_id || "unknown",
241
+ // Safe access with fallback
242
+ file_size: h.file_size,
243
+ case_submitter_id: c.submitter_id,
244
+ case_uuid: c.case_id,
245
+ sample_types: c.samples?.map((s) => s.tissue_type).filter(Boolean) || []
246
+ };
247
+ cnvFiles.push(file);
248
+ }
249
+ }
250
+ }
251
+ result.cnvFiles = { files: cnvFiles };
252
+ console.log(`Successfully processed ${cnvFiles.length} CNV files`);
253
+ } catch (error) {
254
+ console.error("Error fetching CNV files:", error);
255
+ result.cnvFiles = { files: [] };
256
+ }
185
257
  }
186
258
  export {
187
259
  api,
@@ -1,8 +1,9 @@
1
1
  import { runGRIN2Payload } from "#types/checkers";
2
- import { run_rust } from "@sjcrh/proteinpaint-rust";
2
+ import { stream_rust } from "@sjcrh/proteinpaint-rust";
3
3
  import { run_R } from "@sjcrh/proteinpaint-r";
4
4
  import serverconfig from "#src/serverconfig.js";
5
5
  import path from "path";
6
+ import { formatElapsedTime } from "@sjcrh/proteinpaint-shared/time.js";
6
7
  const api = {
7
8
  endpoint: "gdc/runGRIN2",
8
9
  methods: {
@@ -16,6 +17,43 @@ const api = {
16
17
  }
17
18
  }
18
19
  };
20
+ function parseJsonlOutput(rustOutput) {
21
+ const lines = rustOutput.trim().split("\n");
22
+ const allSuccessfulData = [];
23
+ let finalSummary = null;
24
+ for (const line of lines) {
25
+ const trimmedLine = line.trim();
26
+ if (trimmedLine) {
27
+ try {
28
+ const data = JSON.parse(trimmedLine);
29
+ if (data.type === "data") {
30
+ allSuccessfulData.push(data.data);
31
+ } else if (data.type === "summary") {
32
+ finalSummary = data;
33
+ console.log(`[GRIN2] Download complete: ${data.successful_files}/${data.total_files} files successful`);
34
+ if (data.failed_files > 0) {
35
+ console.log(`[GRIN2] ${data.failed_files} files failed`);
36
+ }
37
+ }
38
+ } catch (parseError) {
39
+ console.error("[GRIN2] JSONL parse error:", parseError);
40
+ console.error("[GRIN2] Problematic line:", trimmedLine);
41
+ }
42
+ }
43
+ }
44
+ if (!finalSummary) {
45
+ throw new Error("No summary found in Rust output");
46
+ }
47
+ return {
48
+ successful_data: allSuccessfulData,
49
+ failed_files: finalSummary.errors || [],
50
+ summary: {
51
+ total_files: finalSummary.total_files,
52
+ successful_files: finalSummary.successful_files,
53
+ failed_files: finalSummary.failed_files
54
+ }
55
+ };
56
+ }
19
57
  function init({ genomes }) {
20
58
  return async (req, res) => {
21
59
  try {
@@ -29,22 +67,54 @@ function init({ genomes }) {
29
67
  console.log(`[GRIN2] Request received:`, JSON.stringify(req.query));
30
68
  const parsedRequest = req.query;
31
69
  console.log(`[GRIN2] Parsed request: ${JSON.stringify(parsedRequest)}`);
32
- console.log("[GRIN2] Calling Rust for file processing...");
33
70
  const rustInput = JSON.stringify({
34
71
  caseFiles: parsedRequest.caseFiles,
35
72
  mafOptions: parsedRequest.mafOptions
36
73
  });
37
- console.log("[GRIN2] Executing Rust function...");
38
- const rustResult = await run_rust("gdcGRIN2", rustInput);
74
+ console.log("[GRIN2] Executing Rust function with streaming...");
75
+ let rustOutput = "";
76
+ let buffer = "";
77
+ const downloadStartTime = Date.now();
78
+ const streamResult = stream_rust("gdcGRIN2", rustInput, (errors) => {
79
+ if (errors) {
80
+ throw new Error(`Rust process failed: ${errors}`);
81
+ }
82
+ });
83
+ if (!streamResult) {
84
+ throw new Error("Failed to start Rust streaming process");
85
+ }
86
+ for await (const chunk of streamResult.rustStream) {
87
+ const chunkStr = chunk.toString();
88
+ rustOutput += chunkStr;
89
+ buffer += chunkStr;
90
+ const lines = buffer.split("\n");
91
+ buffer = lines.pop() || "";
92
+ for (const line of lines) {
93
+ const trimmedLine = line.trim();
94
+ if (trimmedLine) {
95
+ try {
96
+ const data = JSON.parse(trimmedLine);
97
+ if (data.type === "summary") {
98
+ console.log(`[GRIN2] Download complete: ${data.successful_files}/${data.total_files} files successful`);
99
+ if (data.failed_files > 0) {
100
+ console.log(`[GRIN2] ${data.failed_files} files failed`);
101
+ }
102
+ }
103
+ } catch (_parseError) {
104
+ }
105
+ }
106
+ }
107
+ }
39
108
  console.log("[GRIN2] Rust execution completed");
109
+ const downloadTime = formatElapsedTime(Date.now() - downloadStartTime);
110
+ console.log(`[GRIN2] Rust processing took ${downloadTime}`);
111
+ const rustResult = parseJsonlOutput(rustOutput);
40
112
  if (!rustResult) {
41
113
  throw new Error("Failed to process MAF files: No result from Rust");
42
114
  }
43
- let parsedRustResult;
115
+ const parsedRustResult = rustResult;
44
116
  let dataForR = [];
45
117
  try {
46
- parsedRustResult = typeof rustResult === "string" ? JSON.parse(rustResult) : rustResult;
47
- console.log(`[GRIN2] Parsed Rust result structure received`);
48
118
  if (parsedRustResult.successful_data && Array.isArray(parsedRustResult.successful_data)) {
49
119
  dataForR = parsedRustResult.successful_data.flat();
50
120
  console.log(`[GRIN2] Extracted ${dataForR.length} records for R script`);
@@ -56,7 +126,7 @@ function init({ genomes }) {
56
126
  dataForR = [];
57
127
  }
58
128
  } catch (parseError) {
59
- console.error("[GRIN2] Error parsing Rust result:", parseError);
129
+ console.error("[GRIN2] Error processing Rust result:", parseError);
60
130
  dataForR = [];
61
131
  }
62
132
  const genedbfile = path.join(serverconfig.tpmasterdir, g.genedb.dbfile);
@@ -68,20 +138,21 @@ function init({ genomes }) {
68
138
  lesion: dataForR
69
139
  // The mutation string from Rust
70
140
  });
71
- console.log(`R input: ${rInput}`);
72
141
  console.log("[GRIN2] Executing R script...");
142
+ const rAnalysisTime = Date.now();
73
143
  const rResult = await run_R("gdcGRIN2.R", rInput, []);
74
- console.log(`[GRIN2] R execution completed, result: ${rResult}`);
144
+ console.log("[GRIN2] R execution completed");
145
+ console.log(`[GRIN2] R analysis took ${formatElapsedTime(Date.now() - rAnalysisTime)}`);
75
146
  let resultData;
76
147
  try {
77
148
  resultData = JSON.parse(rResult);
78
149
  console.log("[GRIN2] Finished R analysis");
79
150
  const pngImg = resultData.png[0];
80
151
  const topGeneTable = resultData.topGeneTable || null;
152
+ console.log("[GRIN2] Total GRIN2 processing time:", formatElapsedTime(Date.now() - downloadStartTime));
81
153
  return res.json({ pngImg, topGeneTable, rustResult: parsedRustResult, status: "success" });
82
154
  } catch (parseError) {
83
155
  console.error("[GRIN2] Error parsing R result:", parseError);
84
- console.log("[GRIN2] Raw R result:", rResult);
85
156
  }
86
157
  } catch (e) {
87
158
  console.error("[GRIN2] Error running analysis:", e);