@sjcrh/proteinpaint-server 2.140.1 → 2.142.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -265,7 +265,8 @@ function termdb_test_default() {
265
265
  },
266
266
  geneExpression: {
267
267
  src: "native",
268
- file: "files/hg38/TermdbTest/rnaseq/TermdbTest.fpkm.matrix.h5",
268
+ file: "files/hg38/TermdbTest/rnaseq/TermdbTest.fpkm.matrix.new.h5",
269
+ newformat: true,
269
270
  unit: "FPKM"
270
271
  },
271
272
  ssGSEA: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.140.1",
3
+ "version": "2.142.0",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -62,9 +62,9 @@
62
62
  "@sjcrh/augen": "2.136.0",
63
63
  "@sjcrh/proteinpaint-python": "2.139.1",
64
64
  "@sjcrh/proteinpaint-r": "2.137.2-0",
65
- "@sjcrh/proteinpaint-rust": "2.140.1",
66
- "@sjcrh/proteinpaint-shared": "2.140.1-1",
67
- "@sjcrh/proteinpaint-types": "2.140.1",
65
+ "@sjcrh/proteinpaint-rust": "2.142.0",
66
+ "@sjcrh/proteinpaint-shared": "2.142.0",
67
+ "@sjcrh/proteinpaint-types": "2.142.0",
68
68
  "@types/express": "^5.0.0",
69
69
  "@types/express-session": "^1.18.1",
70
70
  "better-sqlite3": "^9.4.1",
@@ -3,9 +3,7 @@ import { getData } from "../src/termdb.matrix.js";
3
3
  import { run_R } from "@sjcrh/proteinpaint-r";
4
4
  import { mayLog } from "#src/helpers.ts";
5
5
  import { getStdDev } from "#shared/descriptive.stats.js";
6
- import { formatElapsedTime } from "#shared/time.js";
7
6
  const minArrayLength = 3;
8
- const minSD = 0.05;
9
7
  const api = {
10
8
  endpoint: "termdb/correlationVolcano",
11
9
  methods: {
@@ -39,12 +37,15 @@ function init({ genomes }) {
39
37
  };
40
38
  }
41
39
  async function compute(q, ds) {
42
- const terms = [q.featureTw, ...q.variableTwLst];
40
+ if (!q.featureTw.$id)
41
+ throw "featureTw.$id missing";
42
+ if (!ds.cohort.correlationVolcano.feature.termTypes.includes(q.featureTw?.term.type))
43
+ throw "unsupported featureTw.term.type";
43
44
  const data = await getData(
44
45
  {
45
46
  filter: q.filter,
46
47
  filter0: q.filter0,
47
- terms,
48
+ terms: [q.featureTw, ...q.variableTwLst],
48
49
  __protected__: q.__protected__
49
50
  },
50
51
  ds
@@ -56,30 +57,27 @@ async function compute(q, ds) {
56
57
  vtid2array.set(tw.$id, { id: tw.$id, v1: [], v2: [] });
57
58
  }
58
59
  for (const sid in data.samples) {
59
- const featureValue = q.featureTw.$id === void 0 ? void 0 : data.samples[sid][q.featureTw.$id]?.value;
60
+ const featureValue = data.samples[sid][q.featureTw.$id]?.value;
60
61
  if (!Number.isFinite(featureValue))
61
62
  continue;
62
63
  for (const tw of q.variableTwLst) {
63
- const variableValue = tw.$id === void 0 ? void 0 : data.samples[sid][tw.$id]?.value;
64
+ if (!tw.$id)
65
+ throw "variableTwLst[].$id missing";
66
+ const variableValue = data.samples[sid][tw.$id]?.value;
64
67
  if (!Number.isFinite(variableValue))
65
68
  continue;
66
69
  vtid2array.get(tw.$id).v1.push(featureValue);
67
70
  vtid2array.get(tw.$id).v2.push(variableValue);
68
71
  }
69
72
  }
70
- const [acceptedVariables, skippedVariables] = Array.from(vtid2array.values()).reduce(
71
- ([accepted, skipped], t) => {
72
- const grterThanOne = t.v1.length > minArrayLength && t.v2.length > minArrayLength;
73
- const significantSD = getStdDev(t.v1) > minSD && getStdDev(t.v2) > minSD;
74
- const v = grterThanOne && significantSD ? accepted : skipped;
75
- if (v === accepted)
76
- accepted.push(t);
77
- if (v === skipped)
78
- skipped.push({ tw$id: t.id });
79
- return [accepted, skipped];
80
- },
81
- [[], []]
82
- );
73
+ const acceptedVariables = [], skippedVariables = [];
74
+ for (const [tid, v] of vtid2array) {
75
+ if (v.v1.length < minArrayLength || v.v2.length < minArrayLength || getStdDev(v.v1) == 0 || getStdDev(v.v2) == 0) {
76
+ skippedVariables.push({ tw$id: tid });
77
+ continue;
78
+ }
79
+ acceptedVariables.push(v);
80
+ }
83
81
  const result = { skippedVariables, variableItems: [] };
84
82
  if (!acceptedVariables.length)
85
83
  return result;
@@ -91,9 +89,7 @@ async function compute(q, ds) {
91
89
  const output = {
92
90
  terms: JSON.parse(await run_R("corr.R", JSON.stringify(input)))
93
91
  };
94
- const elapsedMs = Date.now() - time1;
95
- const formattedTime = formatElapsedTime(elapsedMs);
96
- mayLog("Time taken to run correlation analysis:", formattedTime);
92
+ mayLog("Time taken to run correlation analysis:", Date.now() - time1);
97
93
  for (const t of output.terms) {
98
94
  const t2 = {
99
95
  tw$id: t.id,
@@ -113,11 +109,18 @@ function validate_correlationVolcano(ds) {
113
109
  return;
114
110
  if (typeof cv.feature != "object")
115
111
  throw "cv.feature not obj";
116
- if (cv.feature.termType == "geneExpression") {
117
- if (!ds.queries?.geneExpression)
118
- throw "cv.feature.termType=geneExpression not supported by ds";
119
- } else {
120
- throw "unknown cv.feature.termType";
112
+ if (!Array.isArray(cv.feature.termTypes))
113
+ throw "cv.feature.termTypes[] not array";
114
+ for (const t of cv.feature.termTypes) {
115
+ if (t == "geneExpression") {
116
+ if (!ds.queries?.geneExpression)
117
+ throw "geneExpression cv.feature is not supported";
118
+ } else if (t == "ssGSEA") {
119
+ if (!ds.queries?.ssGSEA)
120
+ throw "ssGSEA cv.feature is not supported";
121
+ } else {
122
+ throw "unknown cv.feature.termType";
123
+ }
121
124
  }
122
125
  if (typeof cv.variables != "object")
123
126
  throw "cv.variables not obj";
@@ -240,27 +240,60 @@ async function queryGeneExpression(hdf5_file, geneNames) {
240
240
  throw error;
241
241
  }
242
242
  }
243
+ async function queryHDF5(hdf5_file, query) {
244
+ const jsonInput = JSON.stringify({
245
+ hdf5_file,
246
+ query
247
+ });
248
+ try {
249
+ const result = await run_rust("readH5", jsonInput);
250
+ if (!result || result.length === 0) {
251
+ throw new Error("Failed to retrieve expression data: Empty or missing response");
252
+ }
253
+ return result;
254
+ } catch (error) {
255
+ console.error(`Error querying HDF5 for ${query}`);
256
+ throw error;
257
+ }
258
+ }
243
259
  async function validateNative(q, ds) {
244
260
  q.file = path.join(serverconfig.tpmasterdir, q.file);
245
261
  q.samples = [];
246
262
  try {
247
263
  await utils.file_is_readable(q.file);
248
- const tmp = await run_rust("validateHDF5", JSON.stringify({ hdf5_file: q.file }));
264
+ let tmp;
265
+ if (q.newformat) {
266
+ tmp = await run_rust("readH5", JSON.stringify({ hdf5_file: q.file, validate: true }));
267
+ } else {
268
+ tmp = await run_rust("validateHDF5", JSON.stringify({ hdf5_file: q.file }));
269
+ }
249
270
  const vr = JSON.parse(tmp);
250
271
  if (vr.status !== "success")
251
272
  throw vr.message;
252
- if (!vr.sampleNames?.length)
253
- throw "HDF5 file has no samples, please check file.";
254
- for (const sn of vr.sampleNames) {
255
- const si = ds.cohort.termdb.q.sampleName2id(sn);
256
- if (si == void 0)
257
- throw `unknown sample ${sn} from HDF5 ${q.file}`;
258
- q.samples.push(si);
273
+ if (q.newformat) {
274
+ if (!vr.samples?.length)
275
+ throw "HDF5 file has no samples, please check file.";
276
+ for (const sn of vr.samples) {
277
+ const si = ds.cohort.termdb.q.sampleName2id(sn);
278
+ if (si == void 0)
279
+ throw `unknown sample ${sn} from HDF5 ${q.file}`;
280
+ q.samples.push(si);
281
+ }
282
+ console.log(`${ds.label}: geneExpression HDF5 file validated. Format: ${vr.format}, Samples:`, vr.samples.length);
283
+ } else {
284
+ if (!vr.sampleNames?.length)
285
+ throw "HDF5 file has no samples, please check file.";
286
+ for (const sn of vr.sampleNames) {
287
+ const si = ds.cohort.termdb.q.sampleName2id(sn);
288
+ if (si == void 0)
289
+ throw `unknown sample ${sn} from HDF5 ${q.file}`;
290
+ q.samples.push(si);
291
+ }
292
+ console.log(
293
+ `${ds.label}: geneExpression HDF5 file validated. Format: ${vr.format}, Samples:`,
294
+ vr.sampleNames.length
295
+ );
259
296
  }
260
- console.log(
261
- `${ds.label}: geneExpression HDF5 file validated. Format: ${vr.format}, Samples:`,
262
- vr.sampleNames.length
263
- );
264
297
  } catch (error) {
265
298
  throw `${ds.label}: Failed to validate geneExpression HDF5 file: ${error}`;
266
299
  }
@@ -293,34 +326,70 @@ async function validateNative(q, ds) {
293
326
  return { term2sample2value, byTermId };
294
327
  }
295
328
  const time1 = Date.now();
296
- const geneData = JSON.parse(await queryGeneExpression(q.file, geneNames));
329
+ let geneData;
330
+ if (q.newformat) {
331
+ geneData = JSON.parse(await queryHDF5(q.file, geneNames));
332
+ } else {
333
+ geneData = JSON.parse(await queryGeneExpression(q.file, geneNames));
334
+ }
297
335
  mayLog("Time taken to run gene query:", formatElapsedTime(Date.now() - time1));
298
- const genesData = geneData.genes || { [geneNames[0]]: geneData };
299
- for (const tw of param.terms) {
300
- if (!tw.term.gene)
301
- continue;
302
- const geneResult = genesData[tw.term.gene];
303
- if (!geneResult) {
304
- console.warn(`No data found for gene ${tw.term.gene} in the response`);
305
- continue;
336
+ if (q.newformat) {
337
+ const genesData = geneData.query_output || {};
338
+ if (!genesData)
339
+ throw "No expression data returned from HDF5 query";
340
+ for (const tw of param.terms) {
341
+ if (!tw.term.gene)
342
+ continue;
343
+ const geneResult = genesData[tw.term.gene];
344
+ if (!geneResult) {
345
+ console.warn(`No data found for gene ${tw.term.gene} in the response`);
346
+ continue;
347
+ }
348
+ const samplesData = geneResult.samples || {};
349
+ const s2v = {};
350
+ for (const sampleName in samplesData) {
351
+ const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
352
+ if (!sampleId)
353
+ continue;
354
+ if (limitSamples && !limitSamples.has(sampleId))
355
+ continue;
356
+ s2v[sampleId] = samplesData[sampleName];
357
+ }
358
+ if (Object.keys(s2v).length) {
359
+ term2sample2value.set(tw.$id, s2v);
360
+ }
361
+ }
362
+ if (term2sample2value.size == 0) {
363
+ throw "No data available for the input " + param.terms?.map((tw) => tw.term.gene).join(", ");
306
364
  }
307
- const samplesData = geneResult.samples || {};
308
- const s2v = {};
309
- for (const [sampleName, value] of Object.entries(samplesData)) {
310
- const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
311
- if (!sampleId)
365
+ } else {
366
+ const genesData = geneData.genes || { [geneNames[0]]: geneData };
367
+ for (const tw of param.terms) {
368
+ if (!tw.term.gene)
312
369
  continue;
313
- if (limitSamples && !limitSamples.has(sampleId))
370
+ const geneResult = genesData[tw.term.gene];
371
+ if (!geneResult) {
372
+ console.warn(`No data found for gene ${tw.term.gene} in the response`);
314
373
  continue;
315
- s2v[sampleId] = value;
374
+ }
375
+ const samplesData = geneResult.samples || {};
376
+ const s2v = {};
377
+ for (const [sampleName, value] of Object.entries(samplesData)) {
378
+ const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
379
+ if (!sampleId)
380
+ continue;
381
+ if (limitSamples && !limitSamples.has(sampleId))
382
+ continue;
383
+ s2v[sampleId] = value;
384
+ }
385
+ if (Object.keys(s2v).length) {
386
+ term2sample2value.set(tw.$id, s2v);
387
+ }
316
388
  }
317
- if (Object.keys(s2v).length) {
318
- term2sample2value.set(tw.$id, s2v);
389
+ if (term2sample2value.size == 0) {
390
+ throw "No data available for the input " + param.terms?.map((tw) => tw.term.gene).join(", ");
319
391
  }
320
392
  }
321
- if (term2sample2value.size == 0) {
322
- throw "No data available for the input " + param.terms?.map((tw) => tw.term.gene).join(", ");
323
- }
324
393
  return { term2sample2value, byTermId, bySampleId };
325
394
  };
326
395
  }
@@ -83,8 +83,7 @@ function nativeValidateQuery(ds) {
83
83
  samples.push(n);
84
84
  }
85
85
  }
86
- const genes = await computeGenes4nativeDs(q, gE.file, samples);
87
- return genes;
86
+ return await computeGenes4nativeDs(q, gE, samples);
88
87
  };
89
88
  }
90
89
  function addTopVEarg(q) {
@@ -146,9 +145,9 @@ function addTopVEarg(q) {
146
145
  }
147
146
  q.arguments = arglst;
148
147
  }
149
- async function computeGenes4nativeDs(q, matrixFile, samples) {
148
+ async function computeGenes4nativeDs(q, gE, samples) {
150
149
  const input_json = {
151
- input_file: matrixFile,
150
+ input_file: gE.file,
152
151
  samples: samples.join(","),
153
152
  filter_extreme_values: q.filter_extreme_values,
154
153
  num_genes: q.maxGenes,
@@ -158,6 +157,9 @@ async function computeGenes4nativeDs(q, matrixFile, samples) {
158
157
  input_json["min_count"] = q.min_count;
159
158
  input_json["min_total_count"] = q.min_total_count;
160
159
  }
160
+ if (gE.newformat) {
161
+ input_json["newformat"] = true;
162
+ }
161
163
  const rust_output = await run_rust("topGeneByExpressionVariance", JSON.stringify(input_json));
162
164
  const rust_output_list = rust_output.split("\n");
163
165
  let output_json;