@sjcrh/proteinpaint-server 2.129.6-2b2fdc7ee.0 → 2.130.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.129.6-2b2fdc7ee.0",
3
+ "version": "2.130.0",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -62,10 +62,10 @@
62
62
  "dependencies": {
63
63
  "@sjcrh/augen": "2.121.0",
64
64
  "@sjcrh/proteinpaint-python": "2.118.0",
65
- "@sjcrh/proteinpaint-r": "2.129.2",
66
- "@sjcrh/proteinpaint-rust": "2.129.6-2b2fdc7ee.0",
67
- "@sjcrh/proteinpaint-shared": "2.129.6-2b2fdc7ee.0",
68
- "@sjcrh/proteinpaint-types": "2.129.6-2b2fdc7ee.0",
65
+ "@sjcrh/proteinpaint-r": "2.130.0",
66
+ "@sjcrh/proteinpaint-rust": "2.130.0",
67
+ "@sjcrh/proteinpaint-shared": "2.129.6",
68
+ "@sjcrh/proteinpaint-types": "2.130.0",
69
69
  "@types/express": "^5.0.0",
70
70
  "@types/express-session": "^1.18.1",
71
71
  "better-sqlite3": "^9.4.1",
@@ -2,7 +2,8 @@ import { gdcGRIN2listPayload } from "#types/checkers";
2
2
  import ky from "ky";
3
3
  import { joinUrl } from "#shared/joinUrl.js";
4
4
  import serverconfig from "#src/serverconfig.js";
5
- const maxFileNumber = 1e3;
5
+ import { mayLog } from "#src/helpers.ts";
6
+ const mafMaxFileNumber = 1e3, cnvMaxFileNumber = 1e3;
6
7
  const allowedWorkflowType = "Aliquot Ensemble Somatic Variant Merging and Masking";
7
8
  const maxFileSizeAllowed = 1e6;
8
9
  const maxTotalSizeCompressed = serverconfig.features.gdcMafMaxFileSize || 4e8;
@@ -25,48 +26,45 @@ function init({ genomes }) {
25
26
  const g = genomes.hg38;
26
27
  if (!g)
27
28
  throw "hg38 missing";
28
- const ds = g.datasets.GDC;
29
+ const ds = g.datasets?.GDC;
29
30
  if (!ds)
30
31
  throw "hg38 GDC missing";
31
- const payload = await listMafFiles(req.query, ds);
32
- res.send(payload);
32
+ const result = {
33
+ files: [],
34
+ filesTotal: 0,
35
+ maxTotalSizeCompressed: 0,
36
+ fileCounts: { maf: 0 }
37
+ };
38
+ await mayListMafFiles(req.query, result, ds);
39
+ await mayListCnvFiles(req.query, result, ds);
40
+ res.send(result);
33
41
  } catch (e) {
42
+ if (e.stack)
43
+ console.log(e.stack);
34
44
  res.send({ status: "error", error: e.message || e });
35
45
  }
36
46
  };
37
47
  }
38
- async function listMafFiles(q, ds) {
39
- const shouldRetrieveMaf = !!q.mafOptions;
40
- const experimentalStrategy = q.mafOptions?.experimentalStrategy;
41
- if (shouldRetrieveMaf && !experimentalStrategy) {
42
- throw "Missing experimentalStrategy parameter for MAF file retrieval";
43
- }
44
- const dataFormatFilter = {
48
+ async function mayListMafFiles(q, result, ds) {
49
+ if (!q.mafOptions)
50
+ return;
51
+ const filters = {
45
52
  op: "and",
46
- content: [{ op: "=", content: { field: "data_format", value: "MAF" } }]
53
+ content: [
54
+ { op: "=", content: { field: "data_format", value: "MAF" } },
55
+ { op: "=", content: { field: "experimental_strategy", value: q.mafOptions.experimentalStrategy } },
56
+ { op: "=", content: { field: "analysis.workflow_type", value: allowedWorkflowType } },
57
+ { op: "=", content: { field: "access", value: "open" } }
58
+ ]
47
59
  };
48
- let filters;
49
- if (shouldRetrieveMaf) {
50
- filters = {
51
- op: "and",
52
- content: [
53
- dataFormatFilter,
54
- { op: "=", content: { field: "experimental_strategy", value: experimentalStrategy } },
55
- { op: "=", content: { field: "analysis.workflow_type", value: allowedWorkflowType } },
56
- { op: "=", content: { field: "access", value: "open" } }
57
- ]
58
- };
59
- } else {
60
- throw "At least one file type option must be specified (mafOptions, cnvOptions, or fusionOptions)";
61
- }
62
60
  const case_filters = { op: "and", content: [] };
63
61
  if (q.filter0) {
64
62
  case_filters.content.push(q.filter0);
65
63
  }
66
- const { host } = ds.getHostHeaders(q);
64
+ const { host, headers } = ds.getHostHeaders(q);
67
65
  const body = {
68
66
  filters,
69
- size: maxFileNumber,
67
+ size: mafMaxFileNumber,
70
68
  fields: [
71
69
  "id",
72
70
  "file_size",
@@ -79,7 +77,7 @@ async function listMafFiles(q, ds) {
79
77
  };
80
78
  if (case_filters.content.length)
81
79
  body.case_filters = case_filters;
82
- const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, json: body });
80
+ const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, headers, json: body });
83
81
  if (!response.ok)
84
82
  throw `HTTP Error: ${response.status} ${response.statusText}`;
85
83
  const re = await response.json();
@@ -99,7 +97,7 @@ async function listMafFiles(q, ds) {
99
97
  fileSize: h.file_size,
100
98
  reason: `File size (${h.file_size} bytes) exceeds maximum allowed size (${maxFileSizeAllowed} bytes)`
101
99
  });
102
- console.log(
100
+ mayLog(
103
101
  `File ${h.id} with a size of ${h.file_size} bytes is larger then the allowed file size. It is excluded from the list.
104
102
  If you want to include it, please increase the maxFileSizeAllowed in the code.`
105
103
  );
@@ -149,39 +147,105 @@ If you want to include it, please increase the maxFileSizeAllowed in the code.`
149
147
  fileCount: caseFiles.length,
150
148
  keptFileSize: caseFiles[0].file_size
151
149
  });
152
- console.log(
153
- `Case ${caseId}: Found ${caseFiles.length} MAF files, keeping largest (${caseFiles[0].file_size} bytes)`
154
- );
150
+ mayLog(`Case ${caseId}: Found ${caseFiles.length} MAF files, keeping largest (${caseFiles[0].file_size} bytes)`);
155
151
  } else {
156
152
  deduplicatedFiles.push(caseFiles[0]);
157
153
  }
158
154
  }
159
155
  if (duplicatesRemoved > 0) {
160
- console.log(
156
+ mayLog(
161
157
  `GRIN2 MAF deduplication: Removed ${duplicatesRemoved} duplicate files, kept ${deduplicatedFiles.length} unique cases`
162
158
  );
163
159
  }
164
160
  deduplicatedFiles.sort((a, b) => b.file_size - a.file_size);
165
- const result = {
166
- files: deduplicatedFiles,
167
- filesTotal: re.data.pagination.total,
168
- maxTotalSizeCompressed,
169
- fileCounts: {
170
- maf: files.length
171
- },
172
- appliedFilters: {
173
- fileTypes: shouldRetrieveMaf ? ["MAF"] : [],
174
- experimentalStrategy
175
- },
176
- deduplicationStats: {
177
- originalFileCount: files.length,
178
- deduplicatedFileCount: deduplicatedFiles.length,
179
- duplicatesRemoved,
180
- caseDetails,
181
- filteredFiles
161
+ result.files.push(...deduplicatedFiles);
162
+ result.filesTotal = re.data.pagination.total;
163
+ if (result.fileCounts) {
164
+ result.fileCounts.maf = files.length;
165
+ }
166
+ result.deduplicationStats = {
167
+ originalFileCount: files.length,
168
+ deduplicatedFileCount: deduplicatedFiles.length,
169
+ duplicatesRemoved,
170
+ caseDetails,
171
+ filteredFiles
172
+ };
173
+ }
174
+ async function mayListCnvFiles(q, result, ds) {
175
+ result.cnvFiles = { files: [] };
176
+ if (!q.cnvOptions) {
177
+ console.log("No cnvOptions provided, returning empty cnvFiles");
178
+ return;
179
+ }
180
+ const case_filters = { op: "and", content: [] };
181
+ if (q.filter0) {
182
+ case_filters.content.push(q.filter0);
183
+ }
184
+ const body = {
185
+ size: cnvMaxFileNumber,
186
+ fields: [
187
+ "cases.samples.tissue_type",
188
+ "cases.project.project_id",
189
+ "cases.submitter_id",
190
+ "cases.case_id",
191
+ "data_type",
192
+ "file_id",
193
+ "file_size",
194
+ "data_format",
195
+ "experimental_strategy",
196
+ "analysis.workflow_type"
197
+ ].join(","),
198
+ filters: {
199
+ op: "in",
200
+ content: {
201
+ field: "data_type",
202
+ value: ["Copy Number Segment", "Masked Copy Number Segment", "Allele-specific Copy Number Segment"]
203
+ }
182
204
  }
183
205
  };
184
- return result;
206
+ if (case_filters.content.length)
207
+ body.case_filters = case_filters;
208
+ const { host, headers } = ds.getHostHeaders(q);
209
+ try {
210
+ const re = await ky.post(joinUrl(host.rest, "files"), { timeout: false, headers, json: body }).json();
211
+ console.log("API Response:", {
212
+ hits: re.data?.hits?.length || 0,
213
+ firstHit: re.data?.hits?.[0]
214
+ });
215
+ if (!Array.isArray(re.data.hits)) {
216
+ throw new Error("API response data.hits is not an array");
217
+ }
218
+ const cnvFiles = [];
219
+ for (const h of re.data.hits) {
220
+ if (h.data_format != "TXT") {
221
+ continue;
222
+ }
223
+ if (!h.analysis?.workflow_type)
224
+ throw "h.analysis.workflow_type missing";
225
+ const c = h.cases?.[0];
226
+ if (!c)
227
+ throw "h.cases[0] missing";
228
+ if (h.data_type == "Allele-specific Copy Number Segment") {
229
+ } else if (h.data_type == "Masked Copy Number Segment" || h.data_type == "Copy Number Segment" && h.analysis.workflow_type != "DNACopy") {
230
+ const file = {
231
+ id: h.file_id || h.id,
232
+ // Handle both field names
233
+ project_id: c.project?.project_id || "unknown",
234
+ // Safe access with fallback
235
+ file_size: h.file_size,
236
+ case_submitter_id: c.submitter_id,
237
+ case_uuid: c.case_id,
238
+ sample_types: c.samples?.map((s) => s.tissue_type).filter(Boolean) || []
239
+ };
240
+ cnvFiles.push(file);
241
+ }
242
+ }
243
+ result.cnvFiles = { files: cnvFiles };
244
+ console.log(`Successfully processed ${cnvFiles.length} CNV files`);
245
+ } catch (error) {
246
+ console.error("Error fetching CNV files:", error);
247
+ result.cnvFiles = { files: [] };
248
+ }
185
249
  }
186
250
  export {
187
251
  api,
@@ -1,8 +1,9 @@
1
1
  import { runGRIN2Payload } from "#types/checkers";
2
- import { run_rust } from "@sjcrh/proteinpaint-rust";
2
+ import { stream_rust } from "@sjcrh/proteinpaint-rust";
3
3
  import { run_R } from "@sjcrh/proteinpaint-r";
4
4
  import serverconfig from "#src/serverconfig.js";
5
5
  import path from "path";
6
+ import { formatElapsedTime } from "@sjcrh/proteinpaint-shared/time.js";
6
7
  const api = {
7
8
  endpoint: "gdc/runGRIN2",
8
9
  methods: {
@@ -16,6 +17,48 @@ const api = {
16
17
  }
17
18
  }
18
19
  };
20
+ function parseJsonlOutput(rustOutput) {
21
+ const lines = rustOutput.trim().split("\n");
22
+ const allSuccessfulData = [];
23
+ let finalSummary = null;
24
+ for (const line of lines) {
25
+ const trimmedLine = line.trim();
26
+ if (trimmedLine) {
27
+ try {
28
+ const data = JSON.parse(trimmedLine);
29
+ if (data.type === "data") {
30
+ allSuccessfulData.push(data.data);
31
+ } else if (data.type === "summary") {
32
+ finalSummary = data;
33
+ console.log(`[GRIN2] Download complete: ${data.successful_files}/${data.total_files} files successful`);
34
+ if (data.failed_files > 0) {
35
+ console.log(`[GRIN2] ${data.failed_files} files failed`);
36
+ }
37
+ }
38
+ } catch (parseError) {
39
+ console.error("[GRIN2] JSONL parse error:", parseError);
40
+ console.error("[GRIN2] Problematic line:", trimmedLine);
41
+ }
42
+ }
43
+ }
44
+ if (!finalSummary) {
45
+ throw new Error("No summary found in Rust output");
46
+ }
47
+ return {
48
+ successful_data: allSuccessfulData,
49
+ failed_files: finalSummary.errors || [],
50
+ summary: {
51
+ total_files: finalSummary.total_files,
52
+ successful_files: finalSummary.successful_files,
53
+ failed_files: finalSummary.failed_files,
54
+ errors: finalSummary.errors || [],
55
+ filtered_records: finalSummary.filtered_records || 0,
56
+ filtered_maf_records: finalSummary.filtered_maf_records || 0,
57
+ filtered_cnv_records: finalSummary.filtered_cnv_records || 0,
58
+ filtered_records_by_case: finalSummary.filtered_records_by_case || {}
59
+ }
60
+ };
61
+ }
19
62
  function init({ genomes }) {
20
63
  return async (req, res) => {
21
64
  try {
@@ -29,22 +72,54 @@ function init({ genomes }) {
29
72
  console.log(`[GRIN2] Request received:`, JSON.stringify(req.query));
30
73
  const parsedRequest = req.query;
31
74
  console.log(`[GRIN2] Parsed request: ${JSON.stringify(parsedRequest)}`);
32
- console.log("[GRIN2] Calling Rust for file processing...");
33
75
  const rustInput = JSON.stringify({
34
76
  caseFiles: parsedRequest.caseFiles,
35
77
  mafOptions: parsedRequest.mafOptions
36
78
  });
37
- console.log("[GRIN2] Executing Rust function...");
38
- const rustResult = await run_rust("gdcGRIN2", rustInput);
79
+ console.log("[GRIN2] Executing Rust function with streaming...");
80
+ let rustOutput = "";
81
+ let buffer = "";
82
+ const downloadStartTime = Date.now();
83
+ const streamResult = stream_rust("gdcGRIN2", rustInput, (errors) => {
84
+ if (errors) {
85
+ throw new Error(`Rust process failed: ${errors}`);
86
+ }
87
+ });
88
+ if (!streamResult) {
89
+ throw new Error("Failed to start Rust streaming process");
90
+ }
91
+ for await (const chunk of streamResult.rustStream) {
92
+ const chunkStr = chunk.toString();
93
+ rustOutput += chunkStr;
94
+ buffer += chunkStr;
95
+ const lines = buffer.split("\n");
96
+ buffer = lines.pop() || "";
97
+ for (const line of lines) {
98
+ const trimmedLine = line.trim();
99
+ if (trimmedLine) {
100
+ try {
101
+ const data = JSON.parse(trimmedLine);
102
+ if (data.type === "summary") {
103
+ console.log(`[GRIN2] Download complete: ${data.successful_files}/${data.total_files} files successful`);
104
+ if (data.failed_files > 0) {
105
+ console.log(`[GRIN2] ${data.failed_files} files failed`);
106
+ }
107
+ }
108
+ } catch (_parseError) {
109
+ }
110
+ }
111
+ }
112
+ }
39
113
  console.log("[GRIN2] Rust execution completed");
114
+ const downloadTime = formatElapsedTime(Date.now() - downloadStartTime);
115
+ console.log(`[GRIN2] Rust processing took ${downloadTime}`);
116
+ const rustResult = parseJsonlOutput(rustOutput);
40
117
  if (!rustResult) {
41
118
  throw new Error("Failed to process MAF files: No result from Rust");
42
119
  }
43
- let parsedRustResult;
120
+ const parsedRustResult = rustResult;
44
121
  let dataForR = [];
45
122
  try {
46
- parsedRustResult = typeof rustResult === "string" ? JSON.parse(rustResult) : rustResult;
47
- console.log(`[GRIN2] Parsed Rust result structure received`);
48
123
  if (parsedRustResult.successful_data && Array.isArray(parsedRustResult.successful_data)) {
49
124
  dataForR = parsedRustResult.successful_data.flat();
50
125
  console.log(`[GRIN2] Extracted ${dataForR.length} records for R script`);
@@ -56,7 +131,7 @@ function init({ genomes }) {
56
131
  dataForR = [];
57
132
  }
58
133
  } catch (parseError) {
59
- console.error("[GRIN2] Error parsing Rust result:", parseError);
134
+ console.error("[GRIN2] Error processing Rust result:", parseError);
60
135
  dataForR = [];
61
136
  }
62
137
  const genedbfile = path.join(serverconfig.tpmasterdir, g.genedb.dbfile);
@@ -68,20 +143,28 @@ function init({ genomes }) {
68
143
  lesion: dataForR
69
144
  // The mutation string from Rust
70
145
  });
71
- console.log(`R input: ${rInput}`);
72
146
  console.log("[GRIN2] Executing R script...");
147
+ const rAnalysisTime = Date.now();
73
148
  const rResult = await run_R("gdcGRIN2.R", rInput, []);
74
- console.log(`[GRIN2] R execution completed, result: ${rResult}`);
149
+ console.log("[GRIN2] R execution completed");
150
+ console.log(`[GRIN2] R analysis took ${formatElapsedTime(Date.now() - rAnalysisTime)}`);
75
151
  let resultData;
76
152
  try {
77
153
  resultData = JSON.parse(rResult);
78
154
  console.log("[GRIN2] Finished R analysis");
79
155
  const pngImg = resultData.png[0];
80
156
  const topGeneTable = resultData.topGeneTable || null;
81
- return res.json({ pngImg, topGeneTable, rustResult: parsedRustResult, status: "success" });
157
+ const analysisStats = parsedRustResult.summary || {};
158
+ console.log("[GRIN2] Total GRIN2 processing time:", formatElapsedTime(Date.now() - downloadStartTime));
159
+ return res.json({
160
+ pngImg,
161
+ topGeneTable,
162
+ rustResult: parsedRustResult,
163
+ analysisStats,
164
+ status: "success"
165
+ });
82
166
  } catch (parseError) {
83
167
  console.error("[GRIN2] Error parsing R result:", parseError);
84
- console.log("[GRIN2] Raw R result:", rResult);
85
168
  }
86
169
  } catch (e) {
87
170
  console.error("[GRIN2] Error running analysis:", e);