@sjcrh/proteinpaint-server 2.118.2-0 → 2.118.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -192,7 +192,6 @@ function termdb_test_default() {
192
192
  },
193
193
  geneExpression: {
194
194
  src: "native",
195
- hdf5File: true,
196
195
  file: "files/hg38/TermdbTest/TermdbTest.fpkm.matrix.h5"
197
196
  },
198
197
  topVariablyExpressedGenes: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.118.2-0",
3
+ "version": "2.118.2",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -65,11 +65,11 @@
65
65
  "typescript": "^5.6.3"
66
66
  },
67
67
  "dependencies": {
68
- "@sjcrh/augen": "2.118.2-0",
68
+ "@sjcrh/augen": "2.118.2-1",
69
69
  "@sjcrh/proteinpaint-python": "2.118.0",
70
70
  "@sjcrh/proteinpaint-rust": "2.117.0",
71
71
  "@sjcrh/proteinpaint-shared": "2.118.0",
72
- "@sjcrh/proteinpaint-types": "2.118.1-1",
72
+ "@sjcrh/proteinpaint-types": "2.118.2",
73
73
  "@types/express": "^5.0.0",
74
74
  "@types/express-session": "^1.18.1",
75
75
  "better-sqlite3": "^9.4.1",
@@ -3,6 +3,7 @@ import fs from "fs";
3
3
  import path from "path";
4
4
  import serverconfig from "#src/serverconfig.js";
5
5
  import { run_python } from "@sjcrh/proteinpaint-python";
6
+ import { mayLog } from "#src/helpers.ts";
6
7
  const api = {
7
8
  endpoint: "genesetEnrichment",
8
9
  methods: {
@@ -22,24 +23,24 @@ function init({ genomes }) {
22
23
  const q = req.query;
23
24
  const results = await run_genesetEnrichment_analysis(q, genomes);
24
25
  if (!q.geneset_name) {
25
- if (typeof results != "string")
26
- res.send(results);
27
- else
28
- throw `invalid results type when !req.query.geneset_name`;
29
- } else if (typeof results == "string") {
30
- res.sendFile(results, (err) => {
31
- fs.unlink(results, (del_err) => {
32
- if (del_err) {
33
- console.error("Error deleting file " + results + ":", del_err);
34
- }
35
- });
36
- if (err) {
37
- res.status(404).send("Image not found");
38
- }
39
- });
26
+ if (typeof results != "object")
27
+ throw "gsea result is not object";
28
+ res.send(results);
29
+ return;
40
30
  }
31
+ if (typeof results != "string")
32
+ throw "gsea result is not string";
33
+ res.sendFile(results, (err) => {
34
+ fs.unlink(results, () => {
35
+ });
36
+ if (err) {
37
+ res.status(404).send("Image not found");
38
+ }
39
+ });
41
40
  } catch (e) {
42
41
  res.send({ status: "error", error: e.message || e });
42
+ if (e.stack)
43
+ console.log(e.stack);
43
44
  }
44
45
  };
45
46
  }
@@ -71,17 +72,14 @@ async function run_genesetEnrichment_analysis(q, genomes) {
71
72
  result = JSON.parse(line.replace("image: ", ""));
72
73
  image_found = true;
73
74
  } else {
74
- console.log(line);
75
+ mayLog(line);
75
76
  }
76
77
  }
77
- if (data_found) {
78
+ if (data_found)
78
79
  return result;
79
- } else if (image_found) {
80
- const imagePath = path.join(serverconfig.cachedir, result.image_file);
81
- return imagePath;
82
- } else {
83
- throw ``;
84
- }
80
+ if (image_found)
81
+ return path.join(serverconfig.cachedir, result.image_file);
82
+ throw "data or image not found in gsea output; this should not happen";
85
83
  }
86
84
  export {
87
85
  api
@@ -7,7 +7,6 @@ import serverconfig from "#src/serverconfig.js";
7
7
  import { gdc_validate_query_geneExpression } from "#src/mds3.gdc.js";
8
8
  import { mayLimitSamples } from "#src/mds3.filter.js";
9
9
  import { clusterMethodLst, distanceMethodLst } from "#shared/clustering.js";
10
- import { getResult as getResultGene } from "#src/gene.js";
11
10
  import { TermTypes, NUMERIC_DICTIONARY_TERM } from "#shared/terms.js";
12
11
  import { getData } from "#src/termdb.matrix.js";
13
12
  import { termType2label } from "#shared/terms.js";
@@ -220,7 +219,7 @@ async function validate_query_geneExpression(ds, genome) {
220
219
  return;
221
220
  }
222
221
  if (q.src == "native") {
223
- await validateNative(q, ds, genome);
222
+ await validateNative(q, ds);
224
223
  return;
225
224
  }
226
225
  throw "unknown queries.geneExpression.src";
@@ -256,183 +255,89 @@ async function queryGeneExpression(hdf5_file, geneNames) {
256
255
  throw error;
257
256
  }
258
257
  }
259
- async function validateNative(q, ds, genome) {
260
- if (q.hdf5File === true) {
261
- q.file = path.join(serverconfig.tpmasterdir, q.file);
262
- q.samples = [];
263
- await utils.file_is_readable(q.file);
264
- try {
265
- const vr = await validateHDF5File(q.file);
266
- if (vr.status !== "success")
267
- throw vr.message;
268
- if (!vr.sampleNames?.length)
269
- throw "HDF5 file has no samples";
270
- for (const sn of vr.sampleNames) {
271
- const si = ds.cohort.termdb.q.sampleName2id(sn);
272
- if (si == void 0)
273
- throw "unknown sample from HDF5: " + sn;
274
- q.samples.push(si);
275
- }
276
- console.log(`${ds.label}: HDF5 file validated. Format: ${vr.format}, Samples:`, vr.sampleNames.length);
277
- } catch (error) {
278
- throw `${ds.label}: Failed to validate HDF5 file: ${error}`;
258
+ async function validateNative(q, ds) {
259
+ q.file = path.join(serverconfig.tpmasterdir, q.file);
260
+ q.samples = [];
261
+ await utils.file_is_readable(q.file);
262
+ try {
263
+ const vr = await validateHDF5File(q.file);
264
+ if (vr.status !== "success")
265
+ throw vr.message;
266
+ if (!vr.sampleNames?.length)
267
+ throw "HDF5 file has no samples";
268
+ for (const sn of vr.sampleNames) {
269
+ const si = ds.cohort.termdb.q.sampleName2id(sn);
270
+ if (si == void 0)
271
+ throw "unknown sample from HDF5: " + sn;
272
+ q.samples.push(si);
279
273
  }
280
- q.get = async (param) => {
281
- const limitSamples = await mayLimitSamples(param, q.samples, ds);
282
- if (limitSamples?.size == 0) {
283
- return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
284
- }
285
- const bySampleId = {};
286
- const samples = q.samples || [];
287
- if (limitSamples) {
288
- for (const sid of limitSamples) {
289
- bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
290
- }
291
- } else {
292
- for (const sid of samples) {
293
- bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
294
- }
295
- }
296
- const term2sample2value = /* @__PURE__ */ new Map();
297
- const byTermId = {};
298
- const geneNames = [];
299
- for (const geneTerm of param.terms) {
300
- if (geneTerm.gene) {
301
- geneNames.push(geneTerm.gene);
302
- }
303
- }
304
- if (geneNames.length === 0) {
305
- console.log("No genes to query");
306
- return { term2sample2value, byTermId };
307
- }
308
- const time1 = Date.now();
309
- try {
310
- const geneData = JSON.parse(await queryGeneExpression(q.file, geneNames));
311
- mayLog("Time taken to run gene query:", formatElapsedTime(Date.now() - time1));
312
- const genesData = geneData.genes || { [geneNames[0]]: geneData };
313
- for (const geneTerm of param.terms) {
314
- if (!geneTerm.gene)
315
- continue;
316
- const geneResult = genesData[geneTerm.gene];
317
- if (!geneResult) {
318
- console.warn(`No data found for gene ${geneTerm.gene} in the response`);
319
- continue;
320
- }
321
- const samplesData = geneResult.samples || {};
322
- const s2v = {};
323
- for (const [sampleName, value] of Object.entries(samplesData)) {
324
- const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
325
- if (!sampleId)
326
- continue;
327
- if (limitSamples && !limitSamples.has(sampleId))
328
- continue;
329
- s2v[sampleId] = value;
330
- }
331
- if (Object.keys(s2v).length) {
332
- term2sample2value.set(geneTerm.gene, s2v);
333
- }
334
- }
335
- } catch (error) {
336
- console.error(`Error processing batch gene query:`, error);
337
- }
338
- if (term2sample2value.size == 0) {
339
- throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
340
- }
341
- return { term2sample2value, byTermId, bySampleId };
342
- };
343
- } else {
344
- if (!q.file.startsWith(serverconfig.tpmasterdir)) {
345
- q.file = path.join(serverconfig.tpmasterdir, q.file);
274
+ console.log(`${ds.label}: HDF5 file validated. Format: ${vr.format}, Samples:`, vr.sampleNames.length);
275
+ } catch (error) {
276
+ throw `${ds.label}: Failed to validate HDF5 file: ${error}`;
277
+ }
278
+ q.get = async (param) => {
279
+ const limitSamples = await mayLimitSamples(param, q.samples, ds);
280
+ if (limitSamples?.size == 0) {
281
+ return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
346
282
  }
347
- if (!q.samples)
348
- q.samples = [];
349
- await utils.validate_tabixfile(q.file);
350
- q.nochr = await utils.tabix_is_nochr(q.file, null, genome);
351
- q.samples = [];
352
- {
353
- const lines = await utils.get_header_tabix(q.file);
354
- if (!lines[0])
355
- throw "Header line missing from " + q.file;
356
- const l = lines[0].split(" ");
357
- if (l.slice(0, 4).join(" ") != "#chr start stop gene") {
358
- throw "Header line has wrong content for columns 1-4";
283
+ const bySampleId = {};
284
+ const samples = q.samples || [];
285
+ if (limitSamples) {
286
+ for (const sid of limitSamples) {
287
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
359
288
  }
360
- for (let i = 4; i < l.length; i++) {
361
- const id = ds.cohort.termdb.q.sampleName2id(l[i]);
362
- if (id == void 0) {
363
- throw "queries.geneExpression: unknown sample from header: " + l[i];
364
- }
365
- q.samples.push(id);
289
+ } else {
290
+ for (const sid of samples) {
291
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
366
292
  }
367
293
  }
368
- q.get = async (param) => {
369
- const limitSamples = await mayLimitSamples(param, q.samples, ds);
370
- if (limitSamples?.size == 0) {
371
- return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
294
+ const term2sample2value = /* @__PURE__ */ new Map();
295
+ const byTermId = {};
296
+ const geneNames = [];
297
+ for (const geneTerm of param.terms) {
298
+ if (geneTerm.gene) {
299
+ geneNames.push(geneTerm.gene);
372
300
  }
373
- const bySampleId = {};
374
- const samples = q.samples || [];
375
- if (limitSamples) {
376
- for (const sid of limitSamples) {
377
- bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
378
- }
379
- } else {
380
- for (const sid of samples) {
381
- bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
382
- }
383
- }
384
- const term2sample2value = /* @__PURE__ */ new Map();
301
+ }
302
+ if (geneNames.length === 0) {
303
+ console.log("No genes to query");
304
+ return { term2sample2value, byTermId };
305
+ }
306
+ const time1 = Date.now();
307
+ try {
308
+ const geneData = JSON.parse(await queryGeneExpression(q.file, geneNames));
309
+ mayLog("Time taken to run gene query:", formatElapsedTime(Date.now() - time1));
310
+ const genesData = geneData.genes || { [geneNames[0]]: geneData };
385
311
  for (const geneTerm of param.terms) {
386
312
  if (!geneTerm.gene)
387
313
  continue;
388
- if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop)) {
389
- const re = getResultGene(genome, { input: geneTerm.gene, deep: 1 });
390
- if (!re.gmlst || re.gmlst.length == 0) {
391
- console.warn("Unknown gene:" + geneTerm.gene);
392
- continue;
393
- }
394
- const i = re.gmlst.find((i2) => i2.isdefault) || re.gmlst[0];
395
- geneTerm.start = i.start;
396
- geneTerm.stop = i.stop;
397
- geneTerm.chr = i.chr;
314
+ const geneResult = genesData[geneTerm.gene];
315
+ if (!geneResult) {
316
+ console.warn(`No data found for gene ${geneTerm.gene} in the response`);
317
+ continue;
398
318
  }
319
+ const samplesData = geneResult.samples || {};
399
320
  const s2v = {};
400
- if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop)) {
401
- throw "Missing chr/start/stop";
321
+ for (const [sampleName, value] of Object.entries(samplesData)) {
322
+ const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
323
+ if (!sampleId)
324
+ continue;
325
+ if (limitSamples && !limitSamples.has(sampleId))
326
+ continue;
327
+ s2v[sampleId] = value;
402
328
  }
403
- await utils.get_lines_bigfile({
404
- args: [
405
- q.file,
406
- (q.nochr ? geneTerm.chr.replace("chr", "") : geneTerm.chr) + ":" + geneTerm.start + "-" + geneTerm.stop
407
- ],
408
- callback: (line) => {
409
- const l = line.split(" ");
410
- if (l[3].toLowerCase() != geneTerm.gene.toLowerCase())
411
- return;
412
- for (let i = 4; i < l.length; i++) {
413
- const sampleId = samples[i - 4];
414
- if (limitSamples && !limitSamples.has(sampleId))
415
- continue;
416
- if (!l[i])
417
- continue;
418
- const v = Number(l[i]);
419
- if (Number.isNaN(v))
420
- throw "Expression value not number";
421
- s2v[sampleId] = v;
422
- }
423
- }
424
- });
425
329
  if (Object.keys(s2v).length) {
426
330
  term2sample2value.set(geneTerm.gene, s2v);
427
331
  }
428
332
  }
429
- const byTermId = {};
430
- if (term2sample2value.size == 0) {
431
- throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
432
- }
433
- return { term2sample2value, byTermId, bySampleId };
434
- };
435
- }
333
+ } catch (error) {
334
+ console.error(`Error processing batch gene query:`, error);
335
+ }
336
+ if (term2sample2value.size == 0) {
337
+ throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
338
+ }
339
+ return { term2sample2value, byTermId, bySampleId };
340
+ };
436
341
  }
437
342
  export {
438
343
  api,