@sjcrh/proteinpaint-server 2.140.1 → 2.141.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +2 -1
- package/package.json +4 -4
- package/routes/correlationVolcano.js +30 -27
- package/routes/termdb.cluster.js +102 -33
- package/src/app.js +152 -83
package/dataset/termdb.test.js
CHANGED
|
@@ -265,7 +265,8 @@ function termdb_test_default() {
|
|
|
265
265
|
},
|
|
266
266
|
geneExpression: {
|
|
267
267
|
src: "native",
|
|
268
|
-
file: "files/hg38/TermdbTest/rnaseq/TermdbTest.fpkm.matrix.h5",
|
|
268
|
+
file: "files/hg38/TermdbTest/rnaseq/TermdbTest.fpkm.matrix.new.h5",
|
|
269
|
+
newformat: true,
|
|
269
270
|
unit: "FPKM"
|
|
270
271
|
},
|
|
271
272
|
ssGSEA: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.141.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -62,9 +62,9 @@
|
|
|
62
62
|
"@sjcrh/augen": "2.136.0",
|
|
63
63
|
"@sjcrh/proteinpaint-python": "2.139.1",
|
|
64
64
|
"@sjcrh/proteinpaint-r": "2.137.2-0",
|
|
65
|
-
"@sjcrh/proteinpaint-rust": "2.
|
|
66
|
-
"@sjcrh/proteinpaint-shared": "2.
|
|
67
|
-
"@sjcrh/proteinpaint-types": "2.
|
|
65
|
+
"@sjcrh/proteinpaint-rust": "2.141.0",
|
|
66
|
+
"@sjcrh/proteinpaint-shared": "2.141.0",
|
|
67
|
+
"@sjcrh/proteinpaint-types": "2.141.0",
|
|
68
68
|
"@types/express": "^5.0.0",
|
|
69
69
|
"@types/express-session": "^1.18.1",
|
|
70
70
|
"better-sqlite3": "^9.4.1",
|
|
@@ -3,9 +3,7 @@ import { getData } from "../src/termdb.matrix.js";
|
|
|
3
3
|
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
4
4
|
import { mayLog } from "#src/helpers.ts";
|
|
5
5
|
import { getStdDev } from "#shared/descriptive.stats.js";
|
|
6
|
-
import { formatElapsedTime } from "#shared/time.js";
|
|
7
6
|
const minArrayLength = 3;
|
|
8
|
-
const minSD = 0.05;
|
|
9
7
|
const api = {
|
|
10
8
|
endpoint: "termdb/correlationVolcano",
|
|
11
9
|
methods: {
|
|
@@ -39,12 +37,15 @@ function init({ genomes }) {
|
|
|
39
37
|
};
|
|
40
38
|
}
|
|
41
39
|
async function compute(q, ds) {
|
|
42
|
-
|
|
40
|
+
if (!q.featureTw.$id)
|
|
41
|
+
throw "featureTw.$id missing";
|
|
42
|
+
if (!ds.cohort.correlationVolcano.feature.termTypes.includes(q.featureTw?.term.type))
|
|
43
|
+
throw "unsupported featureTw.term.type";
|
|
43
44
|
const data = await getData(
|
|
44
45
|
{
|
|
45
46
|
filter: q.filter,
|
|
46
47
|
filter0: q.filter0,
|
|
47
|
-
terms,
|
|
48
|
+
terms: [q.featureTw, ...q.variableTwLst],
|
|
48
49
|
__protected__: q.__protected__
|
|
49
50
|
},
|
|
50
51
|
ds
|
|
@@ -56,30 +57,27 @@ async function compute(q, ds) {
|
|
|
56
57
|
vtid2array.set(tw.$id, { id: tw.$id, v1: [], v2: [] });
|
|
57
58
|
}
|
|
58
59
|
for (const sid in data.samples) {
|
|
59
|
-
const featureValue =
|
|
60
|
+
const featureValue = data.samples[sid][q.featureTw.$id]?.value;
|
|
60
61
|
if (!Number.isFinite(featureValue))
|
|
61
62
|
continue;
|
|
62
63
|
for (const tw of q.variableTwLst) {
|
|
63
|
-
|
|
64
|
+
if (!tw.$id)
|
|
65
|
+
throw "variableTwLst[].$id missing";
|
|
66
|
+
const variableValue = data.samples[sid][tw.$id]?.value;
|
|
64
67
|
if (!Number.isFinite(variableValue))
|
|
65
68
|
continue;
|
|
66
69
|
vtid2array.get(tw.$id).v1.push(featureValue);
|
|
67
70
|
vtid2array.get(tw.$id).v2.push(variableValue);
|
|
68
71
|
}
|
|
69
72
|
}
|
|
70
|
-
const [
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
skipped.push({ tw$id: t.id });
|
|
79
|
-
return [accepted, skipped];
|
|
80
|
-
},
|
|
81
|
-
[[], []]
|
|
82
|
-
);
|
|
73
|
+
const acceptedVariables = [], skippedVariables = [];
|
|
74
|
+
for (const [tid, v] of vtid2array) {
|
|
75
|
+
if (v.v1.length < minArrayLength || v.v2.length < minArrayLength || getStdDev(v.v1) == 0 || getStdDev(v.v2) == 0) {
|
|
76
|
+
skippedVariables.push({ tw$id: tid });
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
79
|
+
acceptedVariables.push(v);
|
|
80
|
+
}
|
|
83
81
|
const result = { skippedVariables, variableItems: [] };
|
|
84
82
|
if (!acceptedVariables.length)
|
|
85
83
|
return result;
|
|
@@ -91,9 +89,7 @@ async function compute(q, ds) {
|
|
|
91
89
|
const output = {
|
|
92
90
|
terms: JSON.parse(await run_R("corr.R", JSON.stringify(input)))
|
|
93
91
|
};
|
|
94
|
-
|
|
95
|
-
const formattedTime = formatElapsedTime(elapsedMs);
|
|
96
|
-
mayLog("Time taken to run correlation analysis:", formattedTime);
|
|
92
|
+
mayLog("Time taken to run correlation analysis:", Date.now() - time1);
|
|
97
93
|
for (const t of output.terms) {
|
|
98
94
|
const t2 = {
|
|
99
95
|
tw$id: t.id,
|
|
@@ -113,11 +109,18 @@ function validate_correlationVolcano(ds) {
|
|
|
113
109
|
return;
|
|
114
110
|
if (typeof cv.feature != "object")
|
|
115
111
|
throw "cv.feature not obj";
|
|
116
|
-
if (cv.feature.
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
112
|
+
if (!Array.isArray(cv.feature.termTypes))
|
|
113
|
+
throw "cv.feature.termTypes[] not array";
|
|
114
|
+
for (const t of cv.feature.termTypes) {
|
|
115
|
+
if (t == "geneExpression") {
|
|
116
|
+
if (!ds.queries?.geneExpression)
|
|
117
|
+
throw "geneExpression cv.feature is not supported";
|
|
118
|
+
} else if (t == "ssGSEA") {
|
|
119
|
+
if (!ds.queries?.ssGSEA)
|
|
120
|
+
throw "ssGSEA cv.feature is not supported";
|
|
121
|
+
} else {
|
|
122
|
+
throw "unknown cv.feature.termType";
|
|
123
|
+
}
|
|
121
124
|
}
|
|
122
125
|
if (typeof cv.variables != "object")
|
|
123
126
|
throw "cv.variables not obj";
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -240,27 +240,60 @@ async function queryGeneExpression(hdf5_file, geneNames) {
|
|
|
240
240
|
throw error;
|
|
241
241
|
}
|
|
242
242
|
}
|
|
243
|
+
async function queryHDF5(hdf5_file, query) {
|
|
244
|
+
const jsonInput = JSON.stringify({
|
|
245
|
+
hdf5_file,
|
|
246
|
+
query
|
|
247
|
+
});
|
|
248
|
+
try {
|
|
249
|
+
const result = await run_rust("readH5", jsonInput);
|
|
250
|
+
if (!result || result.length === 0) {
|
|
251
|
+
throw new Error("Failed to retrieve expression data: Empty or missing response");
|
|
252
|
+
}
|
|
253
|
+
return result;
|
|
254
|
+
} catch (error) {
|
|
255
|
+
console.error(`Error querying HDF5 for ${query}`);
|
|
256
|
+
throw error;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
243
259
|
async function validateNative(q, ds) {
|
|
244
260
|
q.file = path.join(serverconfig.tpmasterdir, q.file);
|
|
245
261
|
q.samples = [];
|
|
246
262
|
try {
|
|
247
263
|
await utils.file_is_readable(q.file);
|
|
248
|
-
|
|
264
|
+
let tmp;
|
|
265
|
+
if (q.newformat) {
|
|
266
|
+
tmp = await run_rust("readH5", JSON.stringify({ hdf5_file: q.file, validate: true }));
|
|
267
|
+
} else {
|
|
268
|
+
tmp = await run_rust("validateHDF5", JSON.stringify({ hdf5_file: q.file }));
|
|
269
|
+
}
|
|
249
270
|
const vr = JSON.parse(tmp);
|
|
250
271
|
if (vr.status !== "success")
|
|
251
272
|
throw vr.message;
|
|
252
|
-
if (
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
const
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
273
|
+
if (q.newformat) {
|
|
274
|
+
if (!vr.samples?.length)
|
|
275
|
+
throw "HDF5 file has no samples, please check file.";
|
|
276
|
+
for (const sn of vr.samples) {
|
|
277
|
+
const si = ds.cohort.termdb.q.sampleName2id(sn);
|
|
278
|
+
if (si == void 0)
|
|
279
|
+
throw `unknown sample ${sn} from HDF5 ${q.file}`;
|
|
280
|
+
q.samples.push(si);
|
|
281
|
+
}
|
|
282
|
+
console.log(`${ds.label}: geneExpression HDF5 file validated. Format: ${vr.format}, Samples:`, vr.samples.length);
|
|
283
|
+
} else {
|
|
284
|
+
if (!vr.sampleNames?.length)
|
|
285
|
+
throw "HDF5 file has no samples, please check file.";
|
|
286
|
+
for (const sn of vr.sampleNames) {
|
|
287
|
+
const si = ds.cohort.termdb.q.sampleName2id(sn);
|
|
288
|
+
if (si == void 0)
|
|
289
|
+
throw `unknown sample ${sn} from HDF5 ${q.file}`;
|
|
290
|
+
q.samples.push(si);
|
|
291
|
+
}
|
|
292
|
+
console.log(
|
|
293
|
+
`${ds.label}: geneExpression HDF5 file validated. Format: ${vr.format}, Samples:`,
|
|
294
|
+
vr.sampleNames.length
|
|
295
|
+
);
|
|
259
296
|
}
|
|
260
|
-
console.log(
|
|
261
|
-
`${ds.label}: geneExpression HDF5 file validated. Format: ${vr.format}, Samples:`,
|
|
262
|
-
vr.sampleNames.length
|
|
263
|
-
);
|
|
264
297
|
} catch (error) {
|
|
265
298
|
throw `${ds.label}: Failed to validate geneExpression HDF5 file: ${error}`;
|
|
266
299
|
}
|
|
@@ -293,34 +326,70 @@ async function validateNative(q, ds) {
|
|
|
293
326
|
return { term2sample2value, byTermId };
|
|
294
327
|
}
|
|
295
328
|
const time1 = Date.now();
|
|
296
|
-
|
|
329
|
+
let geneData;
|
|
330
|
+
if (q.newformat) {
|
|
331
|
+
geneData = JSON.parse(await queryHDF5(q.file, geneNames));
|
|
332
|
+
} else {
|
|
333
|
+
geneData = JSON.parse(await queryGeneExpression(q.file, geneNames));
|
|
334
|
+
}
|
|
297
335
|
mayLog("Time taken to run gene query:", formatElapsedTime(Date.now() - time1));
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
if (!
|
|
301
|
-
|
|
302
|
-
const
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
336
|
+
if (q.newformat) {
|
|
337
|
+
const genesData = geneData.query_output || {};
|
|
338
|
+
if (!genesData)
|
|
339
|
+
throw "No expression data returned from HDF5 query";
|
|
340
|
+
for (const tw of param.terms) {
|
|
341
|
+
if (!tw.term.gene)
|
|
342
|
+
continue;
|
|
343
|
+
const geneResult = genesData[tw.term.gene];
|
|
344
|
+
if (!geneResult) {
|
|
345
|
+
console.warn(`No data found for gene ${tw.term.gene} in the response`);
|
|
346
|
+
continue;
|
|
347
|
+
}
|
|
348
|
+
const samplesData = geneResult.samples || {};
|
|
349
|
+
const s2v = {};
|
|
350
|
+
for (const sampleName in samplesData) {
|
|
351
|
+
const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
|
|
352
|
+
if (!sampleId)
|
|
353
|
+
continue;
|
|
354
|
+
if (limitSamples && !limitSamples.has(sampleId))
|
|
355
|
+
continue;
|
|
356
|
+
s2v[sampleId] = samplesData[sampleName];
|
|
357
|
+
}
|
|
358
|
+
if (Object.keys(s2v).length) {
|
|
359
|
+
term2sample2value.set(tw.$id, s2v);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
if (term2sample2value.size == 0) {
|
|
363
|
+
throw "No data available for the input " + param.terms?.map((tw) => tw.term.gene).join(", ");
|
|
306
364
|
}
|
|
307
|
-
|
|
308
|
-
const
|
|
309
|
-
for (const
|
|
310
|
-
|
|
311
|
-
if (!sampleId)
|
|
365
|
+
} else {
|
|
366
|
+
const genesData = geneData.genes || { [geneNames[0]]: geneData };
|
|
367
|
+
for (const tw of param.terms) {
|
|
368
|
+
if (!tw.term.gene)
|
|
312
369
|
continue;
|
|
313
|
-
|
|
370
|
+
const geneResult = genesData[tw.term.gene];
|
|
371
|
+
if (!geneResult) {
|
|
372
|
+
console.warn(`No data found for gene ${tw.term.gene} in the response`);
|
|
314
373
|
continue;
|
|
315
|
-
|
|
374
|
+
}
|
|
375
|
+
const samplesData = geneResult.samples || {};
|
|
376
|
+
const s2v = {};
|
|
377
|
+
for (const [sampleName, value] of Object.entries(samplesData)) {
|
|
378
|
+
const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
|
|
379
|
+
if (!sampleId)
|
|
380
|
+
continue;
|
|
381
|
+
if (limitSamples && !limitSamples.has(sampleId))
|
|
382
|
+
continue;
|
|
383
|
+
s2v[sampleId] = value;
|
|
384
|
+
}
|
|
385
|
+
if (Object.keys(s2v).length) {
|
|
386
|
+
term2sample2value.set(tw.$id, s2v);
|
|
387
|
+
}
|
|
316
388
|
}
|
|
317
|
-
if (
|
|
318
|
-
|
|
389
|
+
if (term2sample2value.size == 0) {
|
|
390
|
+
throw "No data available for the input " + param.terms?.map((tw) => tw.term.gene).join(", ");
|
|
319
391
|
}
|
|
320
392
|
}
|
|
321
|
-
if (term2sample2value.size == 0) {
|
|
322
|
-
throw "No data available for the input " + param.terms?.map((tw) => tw.term.gene).join(", ");
|
|
323
|
-
}
|
|
324
393
|
return { term2sample2value, byTermId, bySampleId };
|
|
325
394
|
};
|
|
326
395
|
}
|