npm - @sjcrh/proteinpaint-server - Versions diffs - 2.173.0 → 2.174.1 - Mend

@sjcrh/proteinpaint-server 2.173.0 → 2.174.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +5 -5
package/routes/grin2.js +173 -95
package/routes/termdb.chat.js +410 -52
package/routes/termdb.cluster.js +2 -1
package/routes/termdb.config.js +1 -0
package/routes/termdb.runChart.js +34 -28
package/src/app.js +860 -333
package/src/mds3.gdc.filter.js +58 -64

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sjcrh/proteinpaint-server",
-  "version": "2.173.0",
+  "version": "2.174.1",
   "type": "module",
   "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
   "main": "src/app.js",
@@ -62,11 +62,11 @@
   },
   "dependencies": {
     "@sjcrh/augen": "2.143.0",
-    "@sjcrh/proteinpaint-python": "2.172.0",
+    "@sjcrh/proteinpaint-python": "2.174.0",
     "@sjcrh/proteinpaint-r": "2.152.1-0",
-    "@sjcrh/proteinpaint-rust": "2.171.0",
-    "@sjcrh/proteinpaint-shared": "2.171.0-0",
-    "@sjcrh/proteinpaint-types": "2.173.0",
+    "@sjcrh/proteinpaint-rust": "2.174.0",
+    "@sjcrh/proteinpaint-shared": "2.174.1",
+    "@sjcrh/proteinpaint-types": "2.174.1",
     "@types/express": "^5.0.0",
     "@types/express-session": "^1.18.1",
     "better-sqlite3": "^12.4.1",

package/routes/grin2.js CHANGED Viewed

@@ -4,11 +4,19 @@ import path from "path";
 import { run_python } from "@sjcrh/proteinpaint-python";
 import { run_rust } from "@sjcrh/proteinpaint-rust";
 import { mayLog } from "#src/helpers.ts";
+import os from "os";
 import { get_samples } from "#src/termdb.sql.js";
 import { read_file, file_is_readable } from "#src/utils.js";
 import { dtsnvindel, dtcnv, dtfusionrna, dtsv, dt2lesion, optionToDt, formatElapsedTime } from "#shared";
 import crypto from "crypto";
-const MAX_LESIONS_PER_TYPE = serverconfig.features.grin2maxLesionPerType || 11e4;
+import { promisify } from "node:util";
+import { exec as execCallback } from "node:child_process";
+const MAX_LESIONS = serverconfig.features.grin2maxLesions || 25e4;
+const GRIN2_MEMORY_BUDGET_MB = 950;
+const GRIN2_CONCURRENCY_LIMIT = 10;
+const MEMORY_BASE_MB = 260;
+const MEMORY_PER_1K_LESIONS = 2.4;
+const MIN_LESIONS = 5e4;
 const api = {
   endpoint: "grin2",
   methods: {
@@ -31,7 +39,7 @@ function init({ genomes }) {
       const ds = g.datasets?.[request.dslabel];
       if (!ds) throw new Error("ds missing");
       if (!ds.queries?.singleSampleMutation) throw new Error("singleSampleMutation query missing from dataset");
-      const result = await runGrin2(g, ds, request);
+      const result = await runGrin2WithLimit(g, ds, request);
       res.json(result);
     } catch (e) {
       console.error("[GRIN2] Error stack:", e.stack);
@@ -43,6 +51,66 @@ function init({ genomes }) {
     }
   };
 }
+const exec = promisify(execCallback);
+async function getAvailableMemoryMB() {
+  try {
+    if (process.platform === "darwin") {
+      const { stdout } = await exec("vm_stat");
+      const output = stdout.toString();
+      const headerLine = output.split("\n")[0] || "";
+      const pageSizeMatch = headerLine.match(/page size of\s+(\d+)\s+bytes/i);
+      const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1], 10) : 16384;
+      const freeMatch = output.match(/Pages free:\s+(\d+)/);
+      const inactiveMatch = output.match(/Pages inactive:\s+(\d+)/);
+      const freePages = freeMatch ? parseInt(freeMatch[1], 10) : 0;
+      const inactivePages = inactiveMatch ? parseInt(inactiveMatch[1], 10) : 0;
+      return (freePages + inactivePages) * pageSize / (1024 * 1024);
+    } else {
+      const { stdout } = await exec("free -m");
+      const output = stdout.toString();
+      const lines = output.split("\n");
+      const memLine = lines.find((l) => l.startsWith("Mem:"));
+      if (memLine) {
+        const parts = memLine.split(/\s+/);
+        return parseInt(parts[6]);
+      }
+    }
+  } catch (e) {
+    mayLog(`[GRIN2] Memory check failed, using fallback: ${e}`);
+  }
+  return os.freemem() / (1024 * 1024);
+}
+async function getMaxLesions() {
+  const availableMemoryMB = await getAvailableMemoryMB();
+  mayLog(`[GRIN2] Available system memory: ${availableMemoryMB.toFixed(0)} MB`);
+  if (availableMemoryMB < GRIN2_MEMORY_BUDGET_MB * 2) {
+    const reducedBudget = availableMemoryMB * 0.4;
+    mayLog(`[GRIN2] Reducing lesion cap due to memory constraints. New budget: ${reducedBudget.toFixed(2)} MB`);
+    const calculated = Math.floor((reducedBudget - MEMORY_BASE_MB) / MEMORY_PER_1K_LESIONS) * 1e3;
+    mayLog(`[GRIN2] Calculated lesion cap based on memory: ${calculated.toLocaleString()}`);
+    return Math.max(MIN_LESIONS, Math.min(MAX_LESIONS, calculated));
+  }
+  return MAX_LESIONS;
+}
+let activeGrin2Jobs = 0;
+async function runGrin2WithLimit(g, ds, request) {
+  if (activeGrin2Jobs >= GRIN2_CONCURRENCY_LIMIT) {
+    const error = new Error(
+      `GRIN2 analysis queue is full (${GRIN2_CONCURRENCY_LIMIT} concurrent analyses). Please try again in a few minutes.`
+    );
+    error.status = 429;
+    error.statusCode = 429;
+    throw error;
+  }
+  activeGrin2Jobs++;
+  mayLog(`[GRIN2] Starting analysis. Active jobs: ${activeGrin2Jobs}/${GRIN2_CONCURRENCY_LIMIT}`);
+  try {
+    return await runGrin2(g, ds, request);
+  } finally {
+    activeGrin2Jobs--;
+    mayLog(`[GRIN2] Analysis complete. Active jobs: ${activeGrin2Jobs}/${GRIN2_CONCURRENCY_LIMIT}`);
+  }
+}
 function generateCacheFileName() {
   const randomHex = crypto.randomBytes(16).toString("hex");
   const cacheFileName = `grin2_results_${randomHex}.txt`;
@@ -81,16 +149,16 @@ async function runGrin2(g, ds, request) {
   if (samples.length === 0) {
     throw new Error("No samples found matching the provided filter criteria");
   }
-  const tracker = getLesionTracker(request);
   const processingStartTime = Date.now();
-  const { lesions, processingSummary } = await processSampleData(samples, ds, request, tracker);
+  const { lesions, processing } = await processSampleData(samples, ds, request);
+  if (!processing) throw new Error("Processing summary is missing");
   const processingTime = Date.now() - processingStartTime;
   mayLog(`[GRIN2] Data processing took ${formatElapsedTime(processingTime)}`);
   mayLog(
-    `[GRIN2] Processing summary: ${processingSummary?.processedSamples ?? 0}/${processingSummary?.totalSamples ?? samples.length} samples processed successfully`
+    `[GRIN2] Processing summary: ${processing?.processedSamples ?? 0}/${processing?.totalSamples ?? samples.length} samples processed successfully`
   );
-  if (processingSummary?.failedSamples !== void 0 && processingSummary.failedSamples > 0) {
-    mayLog(`[GRIN2] Warning: ${processingSummary.failedSamples} samples failed to process`);
+  if (processing?.failedSamples !== void 0 && processing.failedSamples > 0) {
+    mayLog(`[GRIN2] Warning: ${processing.failedSamples} samples failed to process`);
   }
   if (lesions.length === 0) {
     throw new Error("No lesions found after processing all samples. Check filter criteria and input data.");
@@ -101,7 +169,6 @@ async function runGrin2(g, ds, request) {
     chromosomelist: {},
     lesion: JSON.stringify(lesions),
     cacheFileName: generateCacheFileName(),
-    availableDataTypes,
     maxGenesToShow: request.maxGenesToShow,
     lesionTypeMap: buildLesionTypeMap(availableDataTypes)
   };
@@ -142,47 +209,93 @@ async function runGrin2(g, ds, request) {
     throw new Error("Invalid Rust output: missing PNG data");
   }
   const totalTime = processingTime + grin2AnalysisTime + manhattanPlotTime;
+  const lesionTypeRows = [];
+  if (processing.lesionCounts?.byType) {
+    const typeLabels = {};
+    Object.values(dt2lesion).forEach((config) => {
+      config.lesionTypes.forEach((lt) => {
+        typeLabels[lt.lesionType] = lt.name;
+      });
+    });
+    for (const [type, data] of Object.entries(processing.lesionCounts.byType)) {
+      const { count, samples: samples2 } = data;
+      lesionTypeRows.push([typeLabels[type] || type, `${count.toLocaleString()} (${samples2} samples)`]);
+    }
+  }
+  const capWarningRows = [];
+  const expectedToProcess = processing.totalSamples - processing.failedSamples;
+  if (processing.processedSamples < expectedToProcess) {
+    capWarningRows.push([
+      "Note",
+      `Lesion cap of ${processing.lesionCap?.toLocaleString()} was reached before all samples could be processed. Analysis ran on ${processing.processedSamples.toLocaleString()} of ${expectedToProcess.toLocaleString()} samples.`
+    ]);
+  }
   const response = {
     status: "success",
     pngImg: manhattanPlotData.png,
     plotData: manhattanPlotData.plot_data,
     topGeneTable: resultData.topGeneTable,
-    totalGenes: resultData.totalGenes,
-    showingTop: resultData.showingTop,
-    timing: {
-      processingTime: formatElapsedTime(processingTime),
-      grin2Time: formatElapsedTime(grin2AnalysisTime),
-      plottingTime: formatElapsedTime(manhattanPlotTime),
-      totalTime: formatElapsedTime(totalTime)
-    },
-    processingSummary,
-    cacheFileName: resultData.cacheFileName
+    stats: {
+      lst: [
+        {
+          name: "GRIN2 Processing Summary",
+          rows: [
+            ["Total Genes", resultData.totalGenes.toLocaleString()],
+            ["Showing Top", resultData.showingTop.toLocaleString()],
+            ["Cache File Name", resultData.cacheFileName],
+            ["Total Samples", processing.totalSamples.toLocaleString()],
+            ["Processed Samples", processing.processedSamples.toLocaleString()],
+            ["Unprocessed Samples", (processing.unprocessedSamples ?? 0).toLocaleString()],
+            ["Failed Samples", processing.failedSamples.toLocaleString()],
+            ["Failed Files", (processing.failedFiles?.length ?? 0).toLocaleString()],
+            ["Total Lesions", processing.totalLesions.toLocaleString()],
+            ["Processed Lesions", processing.processedLesions.toLocaleString()]
+          ]
+        },
+        {
+          name: "Lesion Counts",
+          rows: lesionTypeRows
+        },
+        {
+          name: "Memory Usage",
+          rows: [
+            ["Start", `${resultData.memory?.start} MB`],
+            ["After prep", `${resultData.memory?.after_prep} MB`],
+            ["After overlaps", `${resultData.memory?.after_overlaps} MB`],
+            ["After counts", `${resultData.memory?.after_counts} MB`],
+            ["After stats", `${resultData.memory?.after_stats} MB`],
+            ["Peak", `${resultData.memory?.peak} MB`]
+          ]
+        },
+        {
+          name: "Timing",
+          rows: [
+            ["Processing", formatElapsedTime(processingTime)],
+            ["GRIN2", formatElapsedTime(grin2AnalysisTime)],
+            ["Plotting", formatElapsedTime(manhattanPlotTime)],
+            ["Total", formatElapsedTime(totalTime)],
+            ...capWarningRows
+          ]
+        }
+      ]
+    }
   };
   return response;
 }
-function getLesionTracker(req) {
-  const currentTypes = [];
-  if (req.snvindelOptions) currentTypes.push(dtsnvindel);
-  if (req.cnvOptions) currentTypes.push(dtcnv);
-  if (req.fusionOptions) currentTypes.push(dtfusionrna);
-  if (req.svOptions) currentTypes.push(dtsv);
-  const track = /* @__PURE__ */ new Map();
-  for (const t of currentTypes) track.set(t, { count: 0 });
-  return track;
-}
-function allTypesCapped(tracker) {
-  for (const value of tracker.values()) {
-    if (value.count < MAX_LESIONS_PER_TYPE) return false;
-  }
-  return true;
-}
-async function processSampleData(samples, ds, request, tracker) {
+async function processSampleData(samples, ds, request) {
   const lesions = [];
+  const maxLesions = await getMaxLesions();
+  mayLog(`[GRIN2] Max lesions for this run: ${maxLesions.toLocaleString()}`);
   const samplesPerType = /* @__PURE__ */ new Map();
-  for (const [type] of tracker.entries()) {
+  const enabledTypes = [];
+  if (request.snvindelOptions) enabledTypes.push(dtsnvindel);
+  if (request.cnvOptions) enabledTypes.push(dtcnv);
+  if (request.fusionOptions) enabledTypes.push(dtfusionrna);
+  if (request.svOptions) enabledTypes.push(dtsv);
+  for (const type of enabledTypes) {
     samplesPerType.set(type, /* @__PURE__ */ new Set());
   }
-  const processingSummary = {
+  const processing = {
     totalSamples: samples.length,
     processedSamples: 0,
     failedSamples: 0,
@@ -191,36 +304,32 @@ async function processSampleData(samples, ds, request, tracker) {
     processedLesions: 0,
     unprocessedSamples: 0
   };
-  outer: for (let i = 0; i < samples.length; i++) {
-    if (allTypesCapped(tracker)) {
+  for (let i = 0; i < samples.length; i++) {
+    if (lesions.length >= maxLesions) {
       const remaining = samples.length - i;
-      if (remaining > 0) processingSummary.unprocessedSamples += remaining;
-      mayLog("[GRIN2] All enabled per-type caps reached; stopping early.");
-      break outer;
+      if (remaining > 0) processing.unprocessedSamples += remaining;
+      mayLog(`[GRIN2] Overall lesion cap (${maxLesions}) reached; stopping early.`);
+      break;
     }
     const sample = samples[i];
     const filepath = path.join(serverconfig.tpmasterdir, ds.queries.singleSampleMutation.folder, sample.name);
     try {
       await file_is_readable(filepath);
       const mlst = JSON.parse(await read_file(filepath));
-      const { sampleLesions, contributedTypes } = await processSampleMlst(sample.name, mlst, request, tracker);
+      const { sampleLesions, contributedTypes } = processSampleMlst(sample.name, mlst, request);
       const skipChrM = ds.queries.singleSampleMutation.discoPlot?.skipChrM;
       const filteredLesions = skipChrM ? sampleLesions.filter((lesion) => lesion[1].toLowerCase() !== "chrm") : sampleLesions;
-      lesions.push(...filteredLesions);
+      const remainingCapacity = maxLesions - lesions.length;
+      const lesionsToAdd = filteredLesions.slice(0, remainingCapacity);
+      lesions.push(...lesionsToAdd);
       for (const type of contributedTypes) {
         samplesPerType.get(type)?.add(sample.name);
       }
-      processingSummary.processedSamples += 1;
-      processingSummary.totalLesions += filteredLesions.length;
-      if (allTypesCapped(tracker)) {
-        const remaining = samples.length - 1 - i;
-        if (remaining > 0) processingSummary.unprocessedSamples += remaining;
-        mayLog("[GRIN2] All enabled per-type caps reached; stopping early.");
-        break outer;
-      }
+      processing.processedSamples += 1;
+      processing.totalLesions += filteredLesions.length;
     } catch (error) {
-      processingSummary.failedSamples += 1;
-      processingSummary.failedFiles.push({
+      processing.failedSamples += 1;
+      processing.failedFiles.push({
         sampleName: sample.name,
         filePath: filepath,
         error: error instanceof Error ? error.message || "Unknown error" : String(error)
@@ -228,7 +337,8 @@ async function processSampleData(samples, ds, request, tracker) {
       mayLog(`[GRIN2] Error processing sample ${sample.name}`);
     }
   }
-  processingSummary.processedLesions = lesions.length;
+  processing.processedLesions = lesions.length;
+  processing.lesionCap = maxLesions;
   const lesionCounts = {
     total: lesions.length,
     byType: {}
@@ -238,36 +348,29 @@ async function processSampleData(samples, ds, request, tracker) {
     const lesionType = lesion[4];
     lesionTypeCounts[lesionType] = (lesionTypeCounts[lesionType] || 0) + 1;
   }
-  for (const [type, info] of tracker.entries()) {
-    const isCapped = info.count >= MAX_LESIONS_PER_TYPE;
+  for (const type of enabledTypes) {
     const sampleCount = samplesPerType.get(type)?.size || 0;
     const dtConfig = dt2lesion[type];
     if (!dtConfig) continue;
     dtConfig.lesionTypes.forEach((lt) => {
       lesionCounts.byType[lt.lesionType] = {
         count: lesionTypeCounts[lt.lesionType] || 0,
-        capped: isCapped,
         samples: sampleCount
       };
     });
   }
-  processingSummary.lesionCounts = lesionCounts;
-  return { lesions, processingSummary };
+  processing.lesionCounts = lesionCounts;
+  return { lesions, processing };
 }
-async function processSampleMlst(sampleName, mlst, request, tracker) {
+function processSampleMlst(sampleName, mlst, request) {
   const sampleLesions = [];
   const contributedTypes = /* @__PURE__ */ new Set();
   for (const m of mlst) {
     switch (m.dt) {
       case dtsnvindel: {
         if (!request.snvindelOptions) break;
-        const entry = tracker.get(dtsnvindel);
-        if (entry && entry.count >= MAX_LESIONS_PER_TYPE) {
-          break;
-        }
         const les = filterAndConvertSnvIndel(sampleName, m, request.snvindelOptions);
-        if (les && entry) {
-          entry.count++;
+        if (les) {
           sampleLesions.push(les);
           contributedTypes.add(dtsnvindel);
         }
@@ -275,13 +378,8 @@ async function processSampleMlst(sampleName, mlst, request, tracker) {
       }
       case dtcnv: {
         if (!request.cnvOptions) break;
-        const cnv = tracker.get(dtcnv);
-        if (cnv && cnv.count >= MAX_LESIONS_PER_TYPE) {
-          break;
-        }
         const les = filterAndConvertCnv(sampleName, m, request.cnvOptions);
-        if (les && cnv) {
-          cnv.count++;
+        if (les) {
           sampleLesions.push(les);
           contributedTypes.add(dtcnv);
         }
@@ -289,24 +387,14 @@ async function processSampleMlst(sampleName, mlst, request, tracker) {
       }
       case dtfusionrna: {
         if (!request.fusionOptions) break;
-        const fusion = tracker.get(dtfusionrna);
-        if (fusion && fusion.count >= MAX_LESIONS_PER_TYPE) {
-          break;
-        }
         const les = filterAndConvertFusion(sampleName, m, request.fusionOptions);
-        if (les && fusion) {
-          const lesionsToAdd = Array.isArray(les[0]) ? les.length : 1;
-          if (fusion.count + lesionsToAdd > MAX_LESIONS_PER_TYPE) {
-            break;
-          }
+        if (les) {
           if (Array.isArray(les[0])) {
             for (const lesion of les) {
               sampleLesions.push(lesion);
-              fusion.count++;
             }
           } else {
             sampleLesions.push(les);
-            fusion.count++;
           }
           contributedTypes.add(dtfusionrna);
         }
@@ -314,24 +402,14 @@ async function processSampleMlst(sampleName, mlst, request, tracker) {
       }
       case dtsv: {
         if (!request.svOptions) break;
-        const sv = tracker.get(dtsv);
-        if (sv && sv.count >= MAX_LESIONS_PER_TYPE) {
-          break;
-        }
         const les = filterAndConvertSV(sampleName, m, request.svOptions);
-        if (les && sv) {
-          const lesionsToAdd = Array.isArray(les[0]) ? les.length : 1;
-          if (sv.count + lesionsToAdd > MAX_LESIONS_PER_TYPE) {
-            break;
-          }
+        if (les) {
           if (Array.isArray(les[0])) {
             for (const lesion of les) {
               sampleLesions.push(lesion);
-              sv.count++;
             }
           } else {
             sampleLesions.push(les);
-            sv.count++;
           }
           contributedTypes.add(dtsv);
         }