@sjcrh/proteinpaint-server 2.118.2-1 → 2.118.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +0 -1
- package/package.json +2 -2
- package/routes/genesetEnrichment.js +21 -23
- package/routes/termdb.cluster.js +68 -163
- package/src/app.js +92 -189
package/dataset/termdb.test.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.118.2
|
|
3
|
+
"version": "2.118.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -69,7 +69,7 @@
|
|
|
69
69
|
"@sjcrh/proteinpaint-python": "2.118.0",
|
|
70
70
|
"@sjcrh/proteinpaint-rust": "2.117.0",
|
|
71
71
|
"@sjcrh/proteinpaint-shared": "2.118.0",
|
|
72
|
-
"@sjcrh/proteinpaint-types": "2.118.
|
|
72
|
+
"@sjcrh/proteinpaint-types": "2.118.2",
|
|
73
73
|
"@types/express": "^5.0.0",
|
|
74
74
|
"@types/express-session": "^1.18.1",
|
|
75
75
|
"better-sqlite3": "^9.4.1",
|
|
@@ -3,6 +3,7 @@ import fs from "fs";
|
|
|
3
3
|
import path from "path";
|
|
4
4
|
import serverconfig from "#src/serverconfig.js";
|
|
5
5
|
import { run_python } from "@sjcrh/proteinpaint-python";
|
|
6
|
+
import { mayLog } from "#src/helpers.ts";
|
|
6
7
|
const api = {
|
|
7
8
|
endpoint: "genesetEnrichment",
|
|
8
9
|
methods: {
|
|
@@ -22,24 +23,24 @@ function init({ genomes }) {
|
|
|
22
23
|
const q = req.query;
|
|
23
24
|
const results = await run_genesetEnrichment_analysis(q, genomes);
|
|
24
25
|
if (!q.geneset_name) {
|
|
25
|
-
if (typeof results != "
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
} else if (typeof results == "string") {
|
|
30
|
-
res.sendFile(results, (err) => {
|
|
31
|
-
fs.unlink(results, (del_err) => {
|
|
32
|
-
if (del_err) {
|
|
33
|
-
console.error("Error deleting file " + results + ":", del_err);
|
|
34
|
-
}
|
|
35
|
-
});
|
|
36
|
-
if (err) {
|
|
37
|
-
res.status(404).send("Image not found");
|
|
38
|
-
}
|
|
39
|
-
});
|
|
26
|
+
if (typeof results != "object")
|
|
27
|
+
throw "gsea result is not object";
|
|
28
|
+
res.send(results);
|
|
29
|
+
return;
|
|
40
30
|
}
|
|
31
|
+
if (typeof results != "string")
|
|
32
|
+
throw "gsea result is not string";
|
|
33
|
+
res.sendFile(results, (err) => {
|
|
34
|
+
fs.unlink(results, () => {
|
|
35
|
+
});
|
|
36
|
+
if (err) {
|
|
37
|
+
res.status(404).send("Image not found");
|
|
38
|
+
}
|
|
39
|
+
});
|
|
41
40
|
} catch (e) {
|
|
42
41
|
res.send({ status: "error", error: e.message || e });
|
|
42
|
+
if (e.stack)
|
|
43
|
+
console.log(e.stack);
|
|
43
44
|
}
|
|
44
45
|
};
|
|
45
46
|
}
|
|
@@ -71,17 +72,14 @@ async function run_genesetEnrichment_analysis(q, genomes) {
|
|
|
71
72
|
result = JSON.parse(line.replace("image: ", ""));
|
|
72
73
|
image_found = true;
|
|
73
74
|
} else {
|
|
74
|
-
|
|
75
|
+
mayLog(line);
|
|
75
76
|
}
|
|
76
77
|
}
|
|
77
|
-
if (data_found)
|
|
78
|
+
if (data_found)
|
|
78
79
|
return result;
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
} else {
|
|
83
|
-
throw ``;
|
|
84
|
-
}
|
|
80
|
+
if (image_found)
|
|
81
|
+
return path.join(serverconfig.cachedir, result.image_file);
|
|
82
|
+
throw "data or image not found in gsea output; this should not happen";
|
|
85
83
|
}
|
|
86
84
|
export {
|
|
87
85
|
api
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -7,7 +7,6 @@ import serverconfig from "#src/serverconfig.js";
|
|
|
7
7
|
import { gdc_validate_query_geneExpression } from "#src/mds3.gdc.js";
|
|
8
8
|
import { mayLimitSamples } from "#src/mds3.filter.js";
|
|
9
9
|
import { clusterMethodLst, distanceMethodLst } from "#shared/clustering.js";
|
|
10
|
-
import { getResult as getResultGene } from "#src/gene.js";
|
|
11
10
|
import { TermTypes, NUMERIC_DICTIONARY_TERM } from "#shared/terms.js";
|
|
12
11
|
import { getData } from "#src/termdb.matrix.js";
|
|
13
12
|
import { termType2label } from "#shared/terms.js";
|
|
@@ -220,7 +219,7 @@ async function validate_query_geneExpression(ds, genome) {
|
|
|
220
219
|
return;
|
|
221
220
|
}
|
|
222
221
|
if (q.src == "native") {
|
|
223
|
-
await validateNative(q, ds
|
|
222
|
+
await validateNative(q, ds);
|
|
224
223
|
return;
|
|
225
224
|
}
|
|
226
225
|
throw "unknown queries.geneExpression.src";
|
|
@@ -256,183 +255,89 @@ async function queryGeneExpression(hdf5_file, geneNames) {
|
|
|
256
255
|
throw error;
|
|
257
256
|
}
|
|
258
257
|
}
|
|
259
|
-
async function validateNative(q, ds
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
q.samples.push(si);
|
|
275
|
-
}
|
|
276
|
-
console.log(`${ds.label}: HDF5 file validated. Format: ${vr.format}, Samples:`, vr.sampleNames.length);
|
|
277
|
-
} catch (error) {
|
|
278
|
-
throw `${ds.label}: Failed to validate HDF5 file: ${error}`;
|
|
258
|
+
async function validateNative(q, ds) {
|
|
259
|
+
q.file = path.join(serverconfig.tpmasterdir, q.file);
|
|
260
|
+
q.samples = [];
|
|
261
|
+
await utils.file_is_readable(q.file);
|
|
262
|
+
try {
|
|
263
|
+
const vr = await validateHDF5File(q.file);
|
|
264
|
+
if (vr.status !== "success")
|
|
265
|
+
throw vr.message;
|
|
266
|
+
if (!vr.sampleNames?.length)
|
|
267
|
+
throw "HDF5 file has no samples";
|
|
268
|
+
for (const sn of vr.sampleNames) {
|
|
269
|
+
const si = ds.cohort.termdb.q.sampleName2id(sn);
|
|
270
|
+
if (si == void 0)
|
|
271
|
+
throw "unknown sample from HDF5: " + sn;
|
|
272
|
+
q.samples.push(si);
|
|
279
273
|
}
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
for (const sid of limitSamples) {
|
|
289
|
-
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
|
|
290
|
-
}
|
|
291
|
-
} else {
|
|
292
|
-
for (const sid of samples) {
|
|
293
|
-
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
const term2sample2value = /* @__PURE__ */ new Map();
|
|
297
|
-
const byTermId = {};
|
|
298
|
-
const geneNames = [];
|
|
299
|
-
for (const geneTerm of param.terms) {
|
|
300
|
-
if (geneTerm.gene) {
|
|
301
|
-
geneNames.push(geneTerm.gene);
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
if (geneNames.length === 0) {
|
|
305
|
-
console.log("No genes to query");
|
|
306
|
-
return { term2sample2value, byTermId };
|
|
307
|
-
}
|
|
308
|
-
const time1 = Date.now();
|
|
309
|
-
try {
|
|
310
|
-
const geneData = JSON.parse(await queryGeneExpression(q.file, geneNames));
|
|
311
|
-
mayLog("Time taken to run gene query:", formatElapsedTime(Date.now() - time1));
|
|
312
|
-
const genesData = geneData.genes || { [geneNames[0]]: geneData };
|
|
313
|
-
for (const geneTerm of param.terms) {
|
|
314
|
-
if (!geneTerm.gene)
|
|
315
|
-
continue;
|
|
316
|
-
const geneResult = genesData[geneTerm.gene];
|
|
317
|
-
if (!geneResult) {
|
|
318
|
-
console.warn(`No data found for gene ${geneTerm.gene} in the response`);
|
|
319
|
-
continue;
|
|
320
|
-
}
|
|
321
|
-
const samplesData = geneResult.samples || {};
|
|
322
|
-
const s2v = {};
|
|
323
|
-
for (const [sampleName, value] of Object.entries(samplesData)) {
|
|
324
|
-
const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
|
|
325
|
-
if (!sampleId)
|
|
326
|
-
continue;
|
|
327
|
-
if (limitSamples && !limitSamples.has(sampleId))
|
|
328
|
-
continue;
|
|
329
|
-
s2v[sampleId] = value;
|
|
330
|
-
}
|
|
331
|
-
if (Object.keys(s2v).length) {
|
|
332
|
-
term2sample2value.set(geneTerm.gene, s2v);
|
|
333
|
-
}
|
|
334
|
-
}
|
|
335
|
-
} catch (error) {
|
|
336
|
-
console.error(`Error processing batch gene query:`, error);
|
|
337
|
-
}
|
|
338
|
-
if (term2sample2value.size == 0) {
|
|
339
|
-
throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
|
|
340
|
-
}
|
|
341
|
-
return { term2sample2value, byTermId, bySampleId };
|
|
342
|
-
};
|
|
343
|
-
} else {
|
|
344
|
-
if (!q.file.startsWith(serverconfig.tpmasterdir)) {
|
|
345
|
-
q.file = path.join(serverconfig.tpmasterdir, q.file);
|
|
274
|
+
console.log(`${ds.label}: HDF5 file validated. Format: ${vr.format}, Samples:`, vr.sampleNames.length);
|
|
275
|
+
} catch (error) {
|
|
276
|
+
throw `${ds.label}: Failed to validate HDF5 file: ${error}`;
|
|
277
|
+
}
|
|
278
|
+
q.get = async (param) => {
|
|
279
|
+
const limitSamples = await mayLimitSamples(param, q.samples, ds);
|
|
280
|
+
if (limitSamples?.size == 0) {
|
|
281
|
+
return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
|
|
346
282
|
}
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
{
|
|
353
|
-
const lines = await utils.get_header_tabix(q.file);
|
|
354
|
-
if (!lines[0])
|
|
355
|
-
throw "Header line missing from " + q.file;
|
|
356
|
-
const l = lines[0].split(" ");
|
|
357
|
-
if (l.slice(0, 4).join(" ") != "#chr start stop gene") {
|
|
358
|
-
throw "Header line has wrong content for columns 1-4";
|
|
283
|
+
const bySampleId = {};
|
|
284
|
+
const samples = q.samples || [];
|
|
285
|
+
if (limitSamples) {
|
|
286
|
+
for (const sid of limitSamples) {
|
|
287
|
+
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
|
|
359
288
|
}
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
throw "queries.geneExpression: unknown sample from header: " + l[i];
|
|
364
|
-
}
|
|
365
|
-
q.samples.push(id);
|
|
289
|
+
} else {
|
|
290
|
+
for (const sid of samples) {
|
|
291
|
+
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
|
|
366
292
|
}
|
|
367
293
|
}
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
294
|
+
const term2sample2value = /* @__PURE__ */ new Map();
|
|
295
|
+
const byTermId = {};
|
|
296
|
+
const geneNames = [];
|
|
297
|
+
for (const geneTerm of param.terms) {
|
|
298
|
+
if (geneTerm.gene) {
|
|
299
|
+
geneNames.push(geneTerm.gene);
|
|
372
300
|
}
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
}
|
|
384
|
-
const term2sample2value = /* @__PURE__ */ new Map();
|
|
301
|
+
}
|
|
302
|
+
if (geneNames.length === 0) {
|
|
303
|
+
console.log("No genes to query");
|
|
304
|
+
return { term2sample2value, byTermId };
|
|
305
|
+
}
|
|
306
|
+
const time1 = Date.now();
|
|
307
|
+
try {
|
|
308
|
+
const geneData = JSON.parse(await queryGeneExpression(q.file, geneNames));
|
|
309
|
+
mayLog("Time taken to run gene query:", formatElapsedTime(Date.now() - time1));
|
|
310
|
+
const genesData = geneData.genes || { [geneNames[0]]: geneData };
|
|
385
311
|
for (const geneTerm of param.terms) {
|
|
386
312
|
if (!geneTerm.gene)
|
|
387
313
|
continue;
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
continue;
|
|
393
|
-
}
|
|
394
|
-
const i = re.gmlst.find((i2) => i2.isdefault) || re.gmlst[0];
|
|
395
|
-
geneTerm.start = i.start;
|
|
396
|
-
geneTerm.stop = i.stop;
|
|
397
|
-
geneTerm.chr = i.chr;
|
|
314
|
+
const geneResult = genesData[geneTerm.gene];
|
|
315
|
+
if (!geneResult) {
|
|
316
|
+
console.warn(`No data found for gene ${geneTerm.gene} in the response`);
|
|
317
|
+
continue;
|
|
398
318
|
}
|
|
319
|
+
const samplesData = geneResult.samples || {};
|
|
399
320
|
const s2v = {};
|
|
400
|
-
|
|
401
|
-
|
|
321
|
+
for (const [sampleName, value] of Object.entries(samplesData)) {
|
|
322
|
+
const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
|
|
323
|
+
if (!sampleId)
|
|
324
|
+
continue;
|
|
325
|
+
if (limitSamples && !limitSamples.has(sampleId))
|
|
326
|
+
continue;
|
|
327
|
+
s2v[sampleId] = value;
|
|
402
328
|
}
|
|
403
|
-
await utils.get_lines_bigfile({
|
|
404
|
-
args: [
|
|
405
|
-
q.file,
|
|
406
|
-
(q.nochr ? geneTerm.chr.replace("chr", "") : geneTerm.chr) + ":" + geneTerm.start + "-" + geneTerm.stop
|
|
407
|
-
],
|
|
408
|
-
callback: (line) => {
|
|
409
|
-
const l = line.split(" ");
|
|
410
|
-
if (l[3].toLowerCase() != geneTerm.gene.toLowerCase())
|
|
411
|
-
return;
|
|
412
|
-
for (let i = 4; i < l.length; i++) {
|
|
413
|
-
const sampleId = samples[i - 4];
|
|
414
|
-
if (limitSamples && !limitSamples.has(sampleId))
|
|
415
|
-
continue;
|
|
416
|
-
if (!l[i])
|
|
417
|
-
continue;
|
|
418
|
-
const v = Number(l[i]);
|
|
419
|
-
if (Number.isNaN(v))
|
|
420
|
-
throw "Expression value not number";
|
|
421
|
-
s2v[sampleId] = v;
|
|
422
|
-
}
|
|
423
|
-
}
|
|
424
|
-
});
|
|
425
329
|
if (Object.keys(s2v).length) {
|
|
426
330
|
term2sample2value.set(geneTerm.gene, s2v);
|
|
427
331
|
}
|
|
428
332
|
}
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
}
|
|
435
|
-
|
|
333
|
+
} catch (error) {
|
|
334
|
+
console.error(`Error processing batch gene query:`, error);
|
|
335
|
+
}
|
|
336
|
+
if (term2sample2value.size == 0) {
|
|
337
|
+
throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
|
|
338
|
+
}
|
|
339
|
+
return { term2sample2value, byTermId, bySampleId };
|
|
340
|
+
};
|
|
436
341
|
}
|
|
437
342
|
export {
|
|
438
343
|
api,
|