@sjcrh/proteinpaint-server 2.173.0 → 2.174.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.173.0",
3
+ "version": "2.174.1",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -62,11 +62,11 @@
62
62
  },
63
63
  "dependencies": {
64
64
  "@sjcrh/augen": "2.143.0",
65
- "@sjcrh/proteinpaint-python": "2.172.0",
65
+ "@sjcrh/proteinpaint-python": "2.174.0",
66
66
  "@sjcrh/proteinpaint-r": "2.152.1-0",
67
- "@sjcrh/proteinpaint-rust": "2.171.0",
68
- "@sjcrh/proteinpaint-shared": "2.171.0-0",
69
- "@sjcrh/proteinpaint-types": "2.173.0",
67
+ "@sjcrh/proteinpaint-rust": "2.174.0",
68
+ "@sjcrh/proteinpaint-shared": "2.174.1",
69
+ "@sjcrh/proteinpaint-types": "2.174.1",
70
70
  "@types/express": "^5.0.0",
71
71
  "@types/express-session": "^1.18.1",
72
72
  "better-sqlite3": "^12.4.1",
package/routes/grin2.js CHANGED
@@ -4,11 +4,19 @@ import path from "path";
4
4
  import { run_python } from "@sjcrh/proteinpaint-python";
5
5
  import { run_rust } from "@sjcrh/proteinpaint-rust";
6
6
  import { mayLog } from "#src/helpers.ts";
7
+ import os from "os";
7
8
  import { get_samples } from "#src/termdb.sql.js";
8
9
  import { read_file, file_is_readable } from "#src/utils.js";
9
10
  import { dtsnvindel, dtcnv, dtfusionrna, dtsv, dt2lesion, optionToDt, formatElapsedTime } from "#shared";
10
11
  import crypto from "crypto";
11
- const MAX_LESIONS_PER_TYPE = serverconfig.features.grin2maxLesionPerType || 11e4;
12
+ import { promisify } from "node:util";
13
+ import { exec as execCallback } from "node:child_process";
14
+ const MAX_LESIONS = serverconfig.features.grin2maxLesions || 25e4;
15
+ const GRIN2_MEMORY_BUDGET_MB = 950;
16
+ const GRIN2_CONCURRENCY_LIMIT = 10;
17
+ const MEMORY_BASE_MB = 260;
18
+ const MEMORY_PER_1K_LESIONS = 2.4;
19
+ const MIN_LESIONS = 5e4;
12
20
  const api = {
13
21
  endpoint: "grin2",
14
22
  methods: {
@@ -31,7 +39,7 @@ function init({ genomes }) {
31
39
  const ds = g.datasets?.[request.dslabel];
32
40
  if (!ds) throw new Error("ds missing");
33
41
  if (!ds.queries?.singleSampleMutation) throw new Error("singleSampleMutation query missing from dataset");
34
- const result = await runGrin2(g, ds, request);
42
+ const result = await runGrin2WithLimit(g, ds, request);
35
43
  res.json(result);
36
44
  } catch (e) {
37
45
  console.error("[GRIN2] Error stack:", e.stack);
@@ -43,6 +51,66 @@ function init({ genomes }) {
43
51
  }
44
52
  };
45
53
  }
54
+ const exec = promisify(execCallback);
55
+ async function getAvailableMemoryMB() {
56
+ try {
57
+ if (process.platform === "darwin") {
58
+ const { stdout } = await exec("vm_stat");
59
+ const output = stdout.toString();
60
+ const headerLine = output.split("\n")[0] || "";
61
+ const pageSizeMatch = headerLine.match(/page size of\s+(\d+)\s+bytes/i);
62
+ const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1], 10) : 16384;
63
+ const freeMatch = output.match(/Pages free:\s+(\d+)/);
64
+ const inactiveMatch = output.match(/Pages inactive:\s+(\d+)/);
65
+ const freePages = freeMatch ? parseInt(freeMatch[1], 10) : 0;
66
+ const inactivePages = inactiveMatch ? parseInt(inactiveMatch[1], 10) : 0;
67
+ return (freePages + inactivePages) * pageSize / (1024 * 1024);
68
+ } else {
69
+ const { stdout } = await exec("free -m");
70
+ const output = stdout.toString();
71
+ const lines = output.split("\n");
72
+ const memLine = lines.find((l) => l.startsWith("Mem:"));
73
+ if (memLine) {
74
+ const parts = memLine.split(/\s+/);
75
+ return parseInt(parts[6]);
76
+ }
77
+ }
78
+ } catch (e) {
79
+ mayLog(`[GRIN2] Memory check failed, using fallback: ${e}`);
80
+ }
81
+ return os.freemem() / (1024 * 1024);
82
+ }
83
+ async function getMaxLesions() {
84
+ const availableMemoryMB = await getAvailableMemoryMB();
85
+ mayLog(`[GRIN2] Available system memory: ${availableMemoryMB.toFixed(0)} MB`);
86
+ if (availableMemoryMB < GRIN2_MEMORY_BUDGET_MB * 2) {
87
+ const reducedBudget = availableMemoryMB * 0.4;
88
+ mayLog(`[GRIN2] Reducing lesion cap due to memory constraints. New budget: ${reducedBudget.toFixed(2)} MB`);
89
+ const calculated = Math.floor((reducedBudget - MEMORY_BASE_MB) / MEMORY_PER_1K_LESIONS) * 1e3;
90
+ mayLog(`[GRIN2] Calculated lesion cap based on memory: ${calculated.toLocaleString()}`);
91
+ return Math.max(MIN_LESIONS, Math.min(MAX_LESIONS, calculated));
92
+ }
93
+ return MAX_LESIONS;
94
+ }
95
+ let activeGrin2Jobs = 0;
96
+ async function runGrin2WithLimit(g, ds, request) {
97
+ if (activeGrin2Jobs >= GRIN2_CONCURRENCY_LIMIT) {
98
+ const error = new Error(
99
+ `GRIN2 analysis queue is full (${GRIN2_CONCURRENCY_LIMIT} concurrent analyses). Please try again in a few minutes.`
100
+ );
101
+ error.status = 429;
102
+ error.statusCode = 429;
103
+ throw error;
104
+ }
105
+ activeGrin2Jobs++;
106
+ mayLog(`[GRIN2] Starting analysis. Active jobs: ${activeGrin2Jobs}/${GRIN2_CONCURRENCY_LIMIT}`);
107
+ try {
108
+ return await runGrin2(g, ds, request);
109
+ } finally {
110
+ activeGrin2Jobs--;
111
+ mayLog(`[GRIN2] Analysis complete. Active jobs: ${activeGrin2Jobs}/${GRIN2_CONCURRENCY_LIMIT}`);
112
+ }
113
+ }
46
114
  function generateCacheFileName() {
47
115
  const randomHex = crypto.randomBytes(16).toString("hex");
48
116
  const cacheFileName = `grin2_results_${randomHex}.txt`;
@@ -81,16 +149,16 @@ async function runGrin2(g, ds, request) {
81
149
  if (samples.length === 0) {
82
150
  throw new Error("No samples found matching the provided filter criteria");
83
151
  }
84
- const tracker = getLesionTracker(request);
85
152
  const processingStartTime = Date.now();
86
- const { lesions, processingSummary } = await processSampleData(samples, ds, request, tracker);
153
+ const { lesions, processing } = await processSampleData(samples, ds, request);
154
+ if (!processing) throw new Error("Processing summary is missing");
87
155
  const processingTime = Date.now() - processingStartTime;
88
156
  mayLog(`[GRIN2] Data processing took ${formatElapsedTime(processingTime)}`);
89
157
  mayLog(
90
- `[GRIN2] Processing summary: ${processingSummary?.processedSamples ?? 0}/${processingSummary?.totalSamples ?? samples.length} samples processed successfully`
158
+ `[GRIN2] Processing summary: ${processing?.processedSamples ?? 0}/${processing?.totalSamples ?? samples.length} samples processed successfully`
91
159
  );
92
- if (processingSummary?.failedSamples !== void 0 && processingSummary.failedSamples > 0) {
93
- mayLog(`[GRIN2] Warning: ${processingSummary.failedSamples} samples failed to process`);
160
+ if (processing?.failedSamples !== void 0 && processing.failedSamples > 0) {
161
+ mayLog(`[GRIN2] Warning: ${processing.failedSamples} samples failed to process`);
94
162
  }
95
163
  if (lesions.length === 0) {
96
164
  throw new Error("No lesions found after processing all samples. Check filter criteria and input data.");
@@ -101,7 +169,6 @@ async function runGrin2(g, ds, request) {
101
169
  chromosomelist: {},
102
170
  lesion: JSON.stringify(lesions),
103
171
  cacheFileName: generateCacheFileName(),
104
- availableDataTypes,
105
172
  maxGenesToShow: request.maxGenesToShow,
106
173
  lesionTypeMap: buildLesionTypeMap(availableDataTypes)
107
174
  };
@@ -142,47 +209,93 @@ async function runGrin2(g, ds, request) {
142
209
  throw new Error("Invalid Rust output: missing PNG data");
143
210
  }
144
211
  const totalTime = processingTime + grin2AnalysisTime + manhattanPlotTime;
212
+ const lesionTypeRows = [];
213
+ if (processing.lesionCounts?.byType) {
214
+ const typeLabels = {};
215
+ Object.values(dt2lesion).forEach((config) => {
216
+ config.lesionTypes.forEach((lt) => {
217
+ typeLabels[lt.lesionType] = lt.name;
218
+ });
219
+ });
220
+ for (const [type, data] of Object.entries(processing.lesionCounts.byType)) {
221
+ const { count, samples: samples2 } = data;
222
+ lesionTypeRows.push([typeLabels[type] || type, `${count.toLocaleString()} (${samples2} samples)`]);
223
+ }
224
+ }
225
+ const capWarningRows = [];
226
+ const expectedToProcess = processing.totalSamples - processing.failedSamples;
227
+ if (processing.processedSamples < expectedToProcess) {
228
+ capWarningRows.push([
229
+ "Note",
230
+ `Lesion cap of ${processing.lesionCap?.toLocaleString()} was reached before all samples could be processed. Analysis ran on ${processing.processedSamples.toLocaleString()} of ${expectedToProcess.toLocaleString()} samples.`
231
+ ]);
232
+ }
145
233
  const response = {
146
234
  status: "success",
147
235
  pngImg: manhattanPlotData.png,
148
236
  plotData: manhattanPlotData.plot_data,
149
237
  topGeneTable: resultData.topGeneTable,
150
- totalGenes: resultData.totalGenes,
151
- showingTop: resultData.showingTop,
152
- timing: {
153
- processingTime: formatElapsedTime(processingTime),
154
- grin2Time: formatElapsedTime(grin2AnalysisTime),
155
- plottingTime: formatElapsedTime(manhattanPlotTime),
156
- totalTime: formatElapsedTime(totalTime)
157
- },
158
- processingSummary,
159
- cacheFileName: resultData.cacheFileName
238
+ stats: {
239
+ lst: [
240
+ {
241
+ name: "GRIN2 Processing Summary",
242
+ rows: [
243
+ ["Total Genes", resultData.totalGenes.toLocaleString()],
244
+ ["Showing Top", resultData.showingTop.toLocaleString()],
245
+ ["Cache File Name", resultData.cacheFileName],
246
+ ["Total Samples", processing.totalSamples.toLocaleString()],
247
+ ["Processed Samples", processing.processedSamples.toLocaleString()],
248
+ ["Unprocessed Samples", (processing.unprocessedSamples ?? 0).toLocaleString()],
249
+ ["Failed Samples", processing.failedSamples.toLocaleString()],
250
+ ["Failed Files", (processing.failedFiles?.length ?? 0).toLocaleString()],
251
+ ["Total Lesions", processing.totalLesions.toLocaleString()],
252
+ ["Processed Lesions", processing.processedLesions.toLocaleString()]
253
+ ]
254
+ },
255
+ {
256
+ name: "Lesion Counts",
257
+ rows: lesionTypeRows
258
+ },
259
+ {
260
+ name: "Memory Usage",
261
+ rows: [
262
+ ["Start", `${resultData.memory?.start} MB`],
263
+ ["After prep", `${resultData.memory?.after_prep} MB`],
264
+ ["After overlaps", `${resultData.memory?.after_overlaps} MB`],
265
+ ["After counts", `${resultData.memory?.after_counts} MB`],
266
+ ["After stats", `${resultData.memory?.after_stats} MB`],
267
+ ["Peak", `${resultData.memory?.peak} MB`]
268
+ ]
269
+ },
270
+ {
271
+ name: "Timing",
272
+ rows: [
273
+ ["Processing", formatElapsedTime(processingTime)],
274
+ ["GRIN2", formatElapsedTime(grin2AnalysisTime)],
275
+ ["Plotting", formatElapsedTime(manhattanPlotTime)],
276
+ ["Total", formatElapsedTime(totalTime)],
277
+ ...capWarningRows
278
+ ]
279
+ }
280
+ ]
281
+ }
160
282
  };
161
283
  return response;
162
284
  }
163
- function getLesionTracker(req) {
164
- const currentTypes = [];
165
- if (req.snvindelOptions) currentTypes.push(dtsnvindel);
166
- if (req.cnvOptions) currentTypes.push(dtcnv);
167
- if (req.fusionOptions) currentTypes.push(dtfusionrna);
168
- if (req.svOptions) currentTypes.push(dtsv);
169
- const track = /* @__PURE__ */ new Map();
170
- for (const t of currentTypes) track.set(t, { count: 0 });
171
- return track;
172
- }
173
- function allTypesCapped(tracker) {
174
- for (const value of tracker.values()) {
175
- if (value.count < MAX_LESIONS_PER_TYPE) return false;
176
- }
177
- return true;
178
- }
179
- async function processSampleData(samples, ds, request, tracker) {
285
+ async function processSampleData(samples, ds, request) {
180
286
  const lesions = [];
287
+ const maxLesions = await getMaxLesions();
288
+ mayLog(`[GRIN2] Max lesions for this run: ${maxLesions.toLocaleString()}`);
181
289
  const samplesPerType = /* @__PURE__ */ new Map();
182
- for (const [type] of tracker.entries()) {
290
+ const enabledTypes = [];
291
+ if (request.snvindelOptions) enabledTypes.push(dtsnvindel);
292
+ if (request.cnvOptions) enabledTypes.push(dtcnv);
293
+ if (request.fusionOptions) enabledTypes.push(dtfusionrna);
294
+ if (request.svOptions) enabledTypes.push(dtsv);
295
+ for (const type of enabledTypes) {
183
296
  samplesPerType.set(type, /* @__PURE__ */ new Set());
184
297
  }
185
- const processingSummary = {
298
+ const processing = {
186
299
  totalSamples: samples.length,
187
300
  processedSamples: 0,
188
301
  failedSamples: 0,
@@ -191,36 +304,32 @@ async function processSampleData(samples, ds, request, tracker) {
191
304
  processedLesions: 0,
192
305
  unprocessedSamples: 0
193
306
  };
194
- outer: for (let i = 0; i < samples.length; i++) {
195
- if (allTypesCapped(tracker)) {
307
+ for (let i = 0; i < samples.length; i++) {
308
+ if (lesions.length >= maxLesions) {
196
309
  const remaining = samples.length - i;
197
- if (remaining > 0) processingSummary.unprocessedSamples += remaining;
198
- mayLog("[GRIN2] All enabled per-type caps reached; stopping early.");
199
- break outer;
310
+ if (remaining > 0) processing.unprocessedSamples += remaining;
311
+ mayLog(`[GRIN2] Overall lesion cap (${maxLesions}) reached; stopping early.`);
312
+ break;
200
313
  }
201
314
  const sample = samples[i];
202
315
  const filepath = path.join(serverconfig.tpmasterdir, ds.queries.singleSampleMutation.folder, sample.name);
203
316
  try {
204
317
  await file_is_readable(filepath);
205
318
  const mlst = JSON.parse(await read_file(filepath));
206
- const { sampleLesions, contributedTypes } = await processSampleMlst(sample.name, mlst, request, tracker);
319
+ const { sampleLesions, contributedTypes } = processSampleMlst(sample.name, mlst, request);
207
320
  const skipChrM = ds.queries.singleSampleMutation.discoPlot?.skipChrM;
208
321
  const filteredLesions = skipChrM ? sampleLesions.filter((lesion) => lesion[1].toLowerCase() !== "chrm") : sampleLesions;
209
- lesions.push(...filteredLesions);
322
+ const remainingCapacity = maxLesions - lesions.length;
323
+ const lesionsToAdd = filteredLesions.slice(0, remainingCapacity);
324
+ lesions.push(...lesionsToAdd);
210
325
  for (const type of contributedTypes) {
211
326
  samplesPerType.get(type)?.add(sample.name);
212
327
  }
213
- processingSummary.processedSamples += 1;
214
- processingSummary.totalLesions += filteredLesions.length;
215
- if (allTypesCapped(tracker)) {
216
- const remaining = samples.length - 1 - i;
217
- if (remaining > 0) processingSummary.unprocessedSamples += remaining;
218
- mayLog("[GRIN2] All enabled per-type caps reached; stopping early.");
219
- break outer;
220
- }
328
+ processing.processedSamples += 1;
329
+ processing.totalLesions += filteredLesions.length;
221
330
  } catch (error) {
222
- processingSummary.failedSamples += 1;
223
- processingSummary.failedFiles.push({
331
+ processing.failedSamples += 1;
332
+ processing.failedFiles.push({
224
333
  sampleName: sample.name,
225
334
  filePath: filepath,
226
335
  error: error instanceof Error ? error.message || "Unknown error" : String(error)
@@ -228,7 +337,8 @@ async function processSampleData(samples, ds, request, tracker) {
228
337
  mayLog(`[GRIN2] Error processing sample ${sample.name}`);
229
338
  }
230
339
  }
231
- processingSummary.processedLesions = lesions.length;
340
+ processing.processedLesions = lesions.length;
341
+ processing.lesionCap = maxLesions;
232
342
  const lesionCounts = {
233
343
  total: lesions.length,
234
344
  byType: {}
@@ -238,36 +348,29 @@ async function processSampleData(samples, ds, request, tracker) {
238
348
  const lesionType = lesion[4];
239
349
  lesionTypeCounts[lesionType] = (lesionTypeCounts[lesionType] || 0) + 1;
240
350
  }
241
- for (const [type, info] of tracker.entries()) {
242
- const isCapped = info.count >= MAX_LESIONS_PER_TYPE;
351
+ for (const type of enabledTypes) {
243
352
  const sampleCount = samplesPerType.get(type)?.size || 0;
244
353
  const dtConfig = dt2lesion[type];
245
354
  if (!dtConfig) continue;
246
355
  dtConfig.lesionTypes.forEach((lt) => {
247
356
  lesionCounts.byType[lt.lesionType] = {
248
357
  count: lesionTypeCounts[lt.lesionType] || 0,
249
- capped: isCapped,
250
358
  samples: sampleCount
251
359
  };
252
360
  });
253
361
  }
254
- processingSummary.lesionCounts = lesionCounts;
255
- return { lesions, processingSummary };
362
+ processing.lesionCounts = lesionCounts;
363
+ return { lesions, processing };
256
364
  }
257
- async function processSampleMlst(sampleName, mlst, request, tracker) {
365
+ function processSampleMlst(sampleName, mlst, request) {
258
366
  const sampleLesions = [];
259
367
  const contributedTypes = /* @__PURE__ */ new Set();
260
368
  for (const m of mlst) {
261
369
  switch (m.dt) {
262
370
  case dtsnvindel: {
263
371
  if (!request.snvindelOptions) break;
264
- const entry = tracker.get(dtsnvindel);
265
- if (entry && entry.count >= MAX_LESIONS_PER_TYPE) {
266
- break;
267
- }
268
372
  const les = filterAndConvertSnvIndel(sampleName, m, request.snvindelOptions);
269
- if (les && entry) {
270
- entry.count++;
373
+ if (les) {
271
374
  sampleLesions.push(les);
272
375
  contributedTypes.add(dtsnvindel);
273
376
  }
@@ -275,13 +378,8 @@ async function processSampleMlst(sampleName, mlst, request, tracker) {
275
378
  }
276
379
  case dtcnv: {
277
380
  if (!request.cnvOptions) break;
278
- const cnv = tracker.get(dtcnv);
279
- if (cnv && cnv.count >= MAX_LESIONS_PER_TYPE) {
280
- break;
281
- }
282
381
  const les = filterAndConvertCnv(sampleName, m, request.cnvOptions);
283
- if (les && cnv) {
284
- cnv.count++;
382
+ if (les) {
285
383
  sampleLesions.push(les);
286
384
  contributedTypes.add(dtcnv);
287
385
  }
@@ -289,24 +387,14 @@ async function processSampleMlst(sampleName, mlst, request, tracker) {
289
387
  }
290
388
  case dtfusionrna: {
291
389
  if (!request.fusionOptions) break;
292
- const fusion = tracker.get(dtfusionrna);
293
- if (fusion && fusion.count >= MAX_LESIONS_PER_TYPE) {
294
- break;
295
- }
296
390
  const les = filterAndConvertFusion(sampleName, m, request.fusionOptions);
297
- if (les && fusion) {
298
- const lesionsToAdd = Array.isArray(les[0]) ? les.length : 1;
299
- if (fusion.count + lesionsToAdd > MAX_LESIONS_PER_TYPE) {
300
- break;
301
- }
391
+ if (les) {
302
392
  if (Array.isArray(les[0])) {
303
393
  for (const lesion of les) {
304
394
  sampleLesions.push(lesion);
305
- fusion.count++;
306
395
  }
307
396
  } else {
308
397
  sampleLesions.push(les);
309
- fusion.count++;
310
398
  }
311
399
  contributedTypes.add(dtfusionrna);
312
400
  }
@@ -314,24 +402,14 @@ async function processSampleMlst(sampleName, mlst, request, tracker) {
314
402
  }
315
403
  case dtsv: {
316
404
  if (!request.svOptions) break;
317
- const sv = tracker.get(dtsv);
318
- if (sv && sv.count >= MAX_LESIONS_PER_TYPE) {
319
- break;
320
- }
321
405
  const les = filterAndConvertSV(sampleName, m, request.svOptions);
322
- if (les && sv) {
323
- const lesionsToAdd = Array.isArray(les[0]) ? les.length : 1;
324
- if (sv.count + lesionsToAdd > MAX_LESIONS_PER_TYPE) {
325
- break;
326
- }
406
+ if (les) {
327
407
  if (Array.isArray(les[0])) {
328
408
  for (const lesion of les) {
329
409
  sampleLesions.push(lesion);
330
- sv.count++;
331
410
  }
332
411
  } else {
333
412
  sampleLesions.push(les);
334
- sv.count++;
335
413
  }
336
414
  contributedTypes.add(dtsv);
337
415
  }