npm - @sjcrh/proteinpaint-server - Versions diffs - 2.129.5 → 2.129.6 - Mend

@sjcrh/proteinpaint-server 2.129.5 → 2.129.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +5 -5
package/routes/gdc.grin2.list.js +125 -53
package/routes/gdc.grin2.run.js +82 -11
package/src/app.js +254 -110

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sjcrh/proteinpaint-server",
-  "version": "2.129.5",
+  "version": "2.129.6",
   "type": "module",
   "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
   "main": "src/app.js",
@@ -62,10 +62,10 @@
   "dependencies": {
     "@sjcrh/augen": "2.121.0",
     "@sjcrh/proteinpaint-python": "2.118.0",
-    "@sjcrh/proteinpaint-r": "2.129.2",
-    "@sjcrh/proteinpaint-rust": "2.129.2",
-    "@sjcrh/proteinpaint-shared": "2.129.5",
-    "@sjcrh/proteinpaint-types": "2.129.2",
+    "@sjcrh/proteinpaint-r": "2.129.6",
+    "@sjcrh/proteinpaint-rust": "2.129.6",
+    "@sjcrh/proteinpaint-shared": "2.129.6",
+    "@sjcrh/proteinpaint-types": "2.129.6",
     "@types/express": "^5.0.0",
     "@types/express-session": "^1.18.1",
     "better-sqlite3": "^9.4.1",

package/routes/gdc.grin2.list.js CHANGED Viewed

@@ -2,7 +2,8 @@ import { gdcGRIN2listPayload } from "#types/checkers";
 import ky from "ky";
 import { joinUrl } from "#shared/joinUrl.js";
 import serverconfig from "#src/serverconfig.js";
-const maxFileNumber = 1e3;
+import { mayLog } from "#src/helpers.ts";
+const mafMaxFileNumber = 1e3, cnvMaxFileNumber = 1e3;
 const allowedWorkflowType = "Aliquot Ensemble Somatic Variant Merging and Masking";
 const maxFileSizeAllowed = 1e6;
 const maxTotalSizeCompressed = serverconfig.features.gdcMafMaxFileSize || 4e8;
@@ -25,48 +26,45 @@ function init({ genomes }) {
       const g = genomes.hg38;
       if (!g)
         throw "hg38 missing";
-      const ds = g.datasets.GDC;
+      const ds = g.datasets?.GDC;
       if (!ds)
         throw "hg38 GDC missing";
-      const payload = await listMafFiles(req.query, ds);
-      res.send(payload);
+      const result = {
+        files: [],
+        filesTotal: 0,
+        maxTotalSizeCompressed: 0,
+        fileCounts: { maf: 0 }
+      };
+      await mayListMafFiles(req.query, result, ds);
+      await mayListCnvFiles(req.query, result, ds);
+      res.send(result);
     } catch (e) {
+      if (e.stack)
+        console.log(e.stack);
       res.send({ status: "error", error: e.message || e });
     }
   };
 }
-async function listMafFiles(q, ds) {
-  const shouldRetrieveMaf = !!q.mafOptions;
-  const experimentalStrategy = q.mafOptions?.experimentalStrategy;
-  if (shouldRetrieveMaf && !experimentalStrategy) {
-    throw "Missing experimentalStrategy parameter for MAF file retrieval";
-  }
-  const dataFormatFilter = {
+async function mayListMafFiles(q, result, ds) {
+  if (!q.mafOptions)
+    return;
+  const filters = {
     op: "and",
-    content: [{ op: "=", content: { field: "data_format", value: "MAF" } }]
+    content: [
+      { op: "=", content: { field: "data_format", value: "MAF" } },
+      { op: "=", content: { field: "experimental_strategy", value: q.mafOptions.experimentalStrategy } },
+      { op: "=", content: { field: "analysis.workflow_type", value: allowedWorkflowType } },
+      { op: "=", content: { field: "access", value: "open" } }
+    ]
   };
-  let filters;
-  if (shouldRetrieveMaf) {
-    filters = {
-      op: "and",
-      content: [
-        dataFormatFilter,
-        { op: "=", content: { field: "experimental_strategy", value: experimentalStrategy } },
-        { op: "=", content: { field: "analysis.workflow_type", value: allowedWorkflowType } },
-        { op: "=", content: { field: "access", value: "open" } }
-      ]
-    };
-  } else {
-    throw "At least one file type option must be specified (mafOptions, cnvOptions, or fusionOptions)";
-  }
   const case_filters = { op: "and", content: [] };
   if (q.filter0) {
     case_filters.content.push(q.filter0);
   }
-  const { host } = ds.getHostHeaders(q);
+  const { host, headers } = ds.getHostHeaders(q);
   const body = {
     filters,
-    size: maxFileNumber,
+    size: mafMaxFileNumber,
     fields: [
       "id",
       "file_size",
@@ -79,7 +77,7 @@ async function listMafFiles(q, ds) {
   };
   if (case_filters.content.length)
     body.case_filters = case_filters;
-  const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, json: body });
+  const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, headers, json: body });
   if (!response.ok)
     throw `HTTP Error: ${response.status} ${response.statusText}`;
   const re = await response.json();
@@ -99,7 +97,7 @@ async function listMafFiles(q, ds) {
         fileSize: h.file_size,
         reason: `File size (${h.file_size} bytes) exceeds maximum allowed size (${maxFileSizeAllowed} bytes)`
       });
-      console.log(
+      mayLog(
         `File ${h.id} with a size of ${h.file_size} bytes is larger then the allowed file size. It is excluded from the list.
 If you want to include it, please increase the maxFileSizeAllowed in the code.`
       );
@@ -149,39 +147,113 @@ If you want to include it, please increase the maxFileSizeAllowed in the code.`
         fileCount: caseFiles.length,
         keptFileSize: caseFiles[0].file_size
       });
-      console.log(
-        `Case ${caseId}: Found ${caseFiles.length} MAF files, keeping largest (${caseFiles[0].file_size} bytes)`
-      );
+      mayLog(`Case ${caseId}: Found ${caseFiles.length} MAF files, keeping largest (${caseFiles[0].file_size} bytes)`);
     } else {
       deduplicatedFiles.push(caseFiles[0]);
     }
   }
   if (duplicatesRemoved > 0) {
-    console.log(
+    mayLog(
       `GRIN2 MAF deduplication: Removed ${duplicatesRemoved} duplicate files, kept ${deduplicatedFiles.length} unique cases`
     );
   }
   deduplicatedFiles.sort((a, b) => b.file_size - a.file_size);
-  const result = {
-    files: deduplicatedFiles,
-    filesTotal: re.data.pagination.total,
-    maxTotalSizeCompressed,
-    fileCounts: {
-      maf: files.length
-    },
-    appliedFilters: {
-      fileTypes: shouldRetrieveMaf ? ["MAF"] : [],
-      experimentalStrategy
-    },
-    deduplicationStats: {
-      originalFileCount: files.length,
-      deduplicatedFileCount: deduplicatedFiles.length,
-      duplicatesRemoved,
-      caseDetails,
-      filteredFiles
-    }
+  result.files.push(...deduplicatedFiles);
+  result.filesTotal = re.data.pagination.total;
+  if (result.fileCounts) {
+    result.fileCounts.maf = files.length;
+  }
+  result.deduplicationStats = {
+    originalFileCount: files.length,
+    deduplicatedFileCount: deduplicatedFiles.length,
+    duplicatesRemoved,
+    caseDetails,
+    filteredFiles
   };
-  return result;
+}
+async function mayListCnvFiles(q, result, ds) {
+  result.cnvFiles = { files: [] };
+  if (!q.cnvOptions) {
+    console.log("No cnvOptions provided, returning empty cnvFiles");
+    return;
+  }
+  const fields = [
+    "cases.samples.tissue_type",
+    "cases.project.project_id",
+    "cases.submitter_id",
+    "cases.case_id",
+    "data_type",
+    "file_id",
+    "file_size",
+    "data_format",
+    "experimental_strategy",
+    "analysis.workflow_type"
+  ];
+  const { host, headers } = ds.getHostHeaders(q);
+  try {
+    const re = await ky.post(joinUrl(host.rest, "files"), {
+      timeout: false,
+      headers,
+      json: {
+        size: cnvMaxFileNumber,
+        fields: fields.join(","),
+        filters: {
+          op: "in",
+          content: {
+            field: "data_type",
+            value: ["Masked Copy Number Segment", "Allele-specific Copy Number Segment"]
+          }
+        }
+      }
+    }).json();
+    console.log("API Response:", {
+      hits: re.data?.hits?.length || 0,
+      firstHit: re.data?.hits?.[0]
+    });
+    if (!Array.isArray(re.data.hits)) {
+      throw new Error("API response data.hits is not an array");
+    }
+    const cnvFiles = [];
+    for (const h of re.data.hits) {
+      console.log("Processing file:", {
+        file_id: h.file_id,
+        data_format: h.data_format,
+        data_type: h.data_type,
+        cases_length: h.cases?.length
+      });
+      if (h.data_format === "TXT") {
+        if (h.data_type === "Masked Copy Number Segment" || h.data_type === "Allele-specific Copy Number Segment") {
+          const c = h.cases?.[0];
+          if (!c) {
+            console.warn(`Skipping file ${h.file_id} - missing case data`);
+            continue;
+          }
+          console.log("Case structure:", {
+            case_id: c.case_id,
+            submitter_id: c.submitter_id,
+            project: c.project,
+            samples: c.samples?.length
+          });
+          const file = {
+            id: h.file_id || h.id,
+            // Handle both field names
+            project_id: c.project?.project_id || "unknown",
+            // Safe access with fallback
+            file_size: h.file_size,
+            case_submitter_id: c.submitter_id,
+            case_uuid: c.case_id,
+            sample_types: c.samples?.map((s) => s.tissue_type).filter(Boolean) || []
+          };
+          cnvFiles.push(file);
+        }
+      }
+    }
+    result.cnvFiles = { files: cnvFiles };
+    console.log(`Successfully processed ${cnvFiles.length} CNV files`);
+  } catch (error) {
+    console.error("Error fetching CNV files:", error);
+    result.cnvFiles = { files: [] };
+  }
 }
 export {
   api,

package/routes/gdc.grin2.run.js CHANGED Viewed

@@ -1,8 +1,9 @@
 import { runGRIN2Payload } from "#types/checkers";
-import { run_rust } from "@sjcrh/proteinpaint-rust";
+import { stream_rust } from "@sjcrh/proteinpaint-rust";
 import { run_R } from "@sjcrh/proteinpaint-r";
 import serverconfig from "#src/serverconfig.js";
 import path from "path";
+import { formatElapsedTime } from "@sjcrh/proteinpaint-shared/time.js";
 const api = {
   endpoint: "gdc/runGRIN2",
   methods: {
@@ -16,6 +17,43 @@ const api = {
     }
   }
 };
+function parseJsonlOutput(rustOutput) {
+  const lines = rustOutput.trim().split("\n");
+  const allSuccessfulData = [];
+  let finalSummary = null;
+  for (const line of lines) {
+    const trimmedLine = line.trim();
+    if (trimmedLine) {
+      try {
+        const data = JSON.parse(trimmedLine);
+        if (data.type === "data") {
+          allSuccessfulData.push(data.data);
+        } else if (data.type === "summary") {
+          finalSummary = data;
+          console.log(`[GRIN2] Download complete: ${data.successful_files}/${data.total_files} files successful`);
+          if (data.failed_files > 0) {
+            console.log(`[GRIN2] ${data.failed_files} files failed`);
+          }
+        }
+      } catch (parseError) {
+        console.error("[GRIN2] JSONL parse error:", parseError);
+        console.error("[GRIN2] Problematic line:", trimmedLine);
+      }
+    }
+  }
+  if (!finalSummary) {
+    throw new Error("No summary found in Rust output");
+  }
+  return {
+    successful_data: allSuccessfulData,
+    failed_files: finalSummary.errors || [],
+    summary: {
+      total_files: finalSummary.total_files,
+      successful_files: finalSummary.successful_files,
+      failed_files: finalSummary.failed_files
+    }
+  };
+}
 function init({ genomes }) {
   return async (req, res) => {
     try {
@@ -29,22 +67,54 @@ function init({ genomes }) {
       console.log(`[GRIN2] Request received:`, JSON.stringify(req.query));
       const parsedRequest = req.query;
       console.log(`[GRIN2] Parsed request: ${JSON.stringify(parsedRequest)}`);
-      console.log("[GRIN2] Calling Rust for file processing...");
       const rustInput = JSON.stringify({
         caseFiles: parsedRequest.caseFiles,
         mafOptions: parsedRequest.mafOptions
       });
-      console.log("[GRIN2] Executing Rust function...");
-      const rustResult = await run_rust("gdcGRIN2", rustInput);
+      console.log("[GRIN2] Executing Rust function with streaming...");
+      let rustOutput = "";
+      let buffer = "";
+      const downloadStartTime = Date.now();
+      const streamResult = stream_rust("gdcGRIN2", rustInput, (errors) => {
+        if (errors) {
+          throw new Error(`Rust process failed: ${errors}`);
+        }
+      });
+      if (!streamResult) {
+        throw new Error("Failed to start Rust streaming process");
+      }
+      for await (const chunk of streamResult.rustStream) {
+        const chunkStr = chunk.toString();
+        rustOutput += chunkStr;
+        buffer += chunkStr;
+        const lines = buffer.split("\n");
+        buffer = lines.pop() || "";
+        for (const line of lines) {
+          const trimmedLine = line.trim();
+          if (trimmedLine) {
+            try {
+              const data = JSON.parse(trimmedLine);
+              if (data.type === "summary") {
+                console.log(`[GRIN2] Download complete: ${data.successful_files}/${data.total_files} files successful`);
+                if (data.failed_files > 0) {
+                  console.log(`[GRIN2] ${data.failed_files} files failed`);
+                }
+              }
+            } catch (_parseError) {
+            }
+          }
+        }
+      }
       console.log("[GRIN2] Rust execution completed");
+      const downloadTime = formatElapsedTime(Date.now() - downloadStartTime);
+      console.log(`[GRIN2] Rust processing took ${downloadTime}`);
+      const rustResult = parseJsonlOutput(rustOutput);
       if (!rustResult) {
         throw new Error("Failed to process MAF files: No result from Rust");
       }
-      let parsedRustResult;
+      const parsedRustResult = rustResult;
       let dataForR = [];
       try {
-        parsedRustResult = typeof rustResult === "string" ? JSON.parse(rustResult) : rustResult;
-        console.log(`[GRIN2] Parsed Rust result structure received`);
         if (parsedRustResult.successful_data && Array.isArray(parsedRustResult.successful_data)) {
           dataForR = parsedRustResult.successful_data.flat();
           console.log(`[GRIN2] Extracted ${dataForR.length} records for R script`);
@@ -56,7 +126,7 @@ function init({ genomes }) {
           dataForR = [];
         }
       } catch (parseError) {
-        console.error("[GRIN2] Error parsing Rust result:", parseError);
+        console.error("[GRIN2] Error processing Rust result:", parseError);
         dataForR = [];
       }
       const genedbfile = path.join(serverconfig.tpmasterdir, g.genedb.dbfile);
@@ -68,20 +138,21 @@ function init({ genomes }) {
         lesion: dataForR
         // The mutation string from Rust
       });
-      console.log(`R input: ${rInput}`);
       console.log("[GRIN2] Executing R script...");
+      const rAnalysisTime = Date.now();
       const rResult = await run_R("gdcGRIN2.R", rInput, []);
-      console.log(`[GRIN2] R execution completed, result: ${rResult}`);
+      console.log("[GRIN2] R execution completed");
+      console.log(`[GRIN2] R analysis took ${formatElapsedTime(Date.now() - rAnalysisTime)}`);
       let resultData;
       try {
         resultData = JSON.parse(rResult);
         console.log("[GRIN2] Finished R analysis");
         const pngImg = resultData.png[0];
         const topGeneTable = resultData.topGeneTable || null;
+        console.log("[GRIN2] Total GRIN2 processing time:", formatElapsedTime(Date.now() - downloadStartTime));
         return res.json({ pngImg, topGeneTable, rustResult: parsedRustResult, status: "success" });
       } catch (parseError) {
         console.error("[GRIN2] Error parsing R result:", parseError);
-        console.log("[GRIN2] Raw R result:", rResult);
       }
     } catch (e) {
       console.error("[GRIN2] Error running analysis:", e);