@sjcrh/proteinpaint-server 2.112.1-0 → 2.114.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.112.1-0",
3
+ "version": "2.114.0",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -18,6 +18,9 @@
18
18
  "prestart": "tsx emitImports.js dev > server.js",
19
19
  "start": "tsx watch . /start.js",
20
20
  "test:unit": "tsx emitImports.js unit > serverTests.js && c8 tsx serverTests.js && rm -rf ./cache",
21
+ "precombined:coverage": "tsx emitImports.js unit > serverTests.js",
22
+ "combined:coverage": "closeCoverageKey=test c8 --all --src=proteinpaint/server --experimental-monocart -r=v8 -r=html -r=json -r=markdown-summary -r=markdown-details -o=./.coverage tsx ./coverage.js & ",
23
+ "postcombined:coverage": "rm -rf ./cache",
21
24
  "getconf": "../build/getConfigProp.js",
22
25
  "doc": "../augen/build.sh routes shared/types/routes shared/checkers ../public/docs/server",
23
26
  "mjs": "esbuild \"$DIR/*.ts\" --platform=node --outdir=\"$DIR\" --format=esm",
@@ -49,6 +52,8 @@
49
52
  "c8": "^10.1.3",
50
53
  "esbuild": "^0.19.12",
51
54
  "glob": "^10.4.5",
55
+ "monocart-coverage-reports": "^2.12.1",
56
+ "node-notifier": "^9.0.1",
52
57
  "node-watch": "^0.7.1",
53
58
  "nodemon": "^2.0.19",
54
59
  "prettier": "^2.8.8",
@@ -59,10 +64,10 @@
59
64
  "typescript": "^5.6.3"
60
65
  },
61
66
  "dependencies": {
62
- "@sjcrh/augen": "2.109.1-0",
63
- "@sjcrh/proteinpaint-rust": "2.112.0",
64
- "@sjcrh/proteinpaint-shared": "2.112.1-0",
65
- "@sjcrh/proteinpaint-types": "2.112.1-0",
67
+ "@sjcrh/augen": "2.113.0",
68
+ "@sjcrh/proteinpaint-rust": "2.114.0",
69
+ "@sjcrh/proteinpaint-shared": "2.114.0",
70
+ "@sjcrh/proteinpaint-types": "2.114.0",
66
71
  "@types/express": "^5.0.0",
67
72
  "@types/express-session": "^1.18.1",
68
73
  "better-sqlite3": "^9.4.1",
@@ -196,7 +196,10 @@ async function run_DE(param, ds, term_results, term_results2) {
196
196
  cachedir: serverconfig.cachedir,
197
197
  min_count: param.min_count,
198
198
  min_total_count: param.min_total_count,
199
- storage_type: param.storage_type
199
+ storage_type: param.storage_type,
200
+ DE_method: param.method,
201
+ mds_cutoff: 1e4
202
+ // If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive. Number of genes * Number of samples < mds_cutoff for mds generation
200
203
  };
201
204
  if (param.tw) {
202
205
  expression_input.conf1 = [...conf1_group2, ...conf1_group1];
@@ -213,7 +216,7 @@ async function run_DE(param, ds, term_results, term_results2) {
213
216
  }
214
217
  }
215
218
  const sample_size_limit = 8;
216
- if (group1names.length <= sample_size_limit && group2names.length <= sample_size_limit || param.method == "edgeR") {
219
+ if (group1names.length <= sample_size_limit && group2names.length <= sample_size_limit || param.method == "edgeR" || param.method == "limma") {
217
220
  const time12 = (/* @__PURE__ */ new Date()).valueOf();
218
221
  const result2 = JSON.parse(
219
222
  await run_R(path.join(serverconfig.binpath, "utils", "edge.R"), JSON.stringify(expression_input))
@@ -222,21 +225,21 @@ async function run_DE(param, ds, term_results, term_results2) {
222
225
  param.method = "edgeR";
223
226
  const ql_imagePath = path.join(serverconfig.cachedir, result2.edgeR_ql_image_name[0]);
224
227
  mayLog("ql_imagePath:", ql_imagePath);
225
- const mds_imagePath = path.join(serverconfig.cachedir, result2.edgeR_mds_image_name[0]);
226
- mayLog("mds_imagePath:", mds_imagePath);
227
- await readFileAndDelete(mds_imagePath, "mds_image", result2);
228
228
  await readFileAndDelete(ql_imagePath, "ql_image", result2);
229
+ if (result2.edgeR_mds_image_name) {
230
+ const mds_imagePath = path.join(serverconfig.cachedir, result2.edgeR_mds_image_name[0]);
231
+ mayLog("mds_imagePath:", mds_imagePath);
232
+ await readFileAndDelete(mds_imagePath, "mds_image", result2);
233
+ }
234
+ const images = [result2.ql_image];
235
+ if (result2.mds_image)
236
+ images.push(result2.mds_image);
229
237
  return {
230
238
  data: result2.gene_data,
231
239
  sample_size1,
232
240
  sample_size2,
233
241
  method: param.method,
234
- images: [
235
- result2.mds_image,
236
- // MDS image
237
- result2.ql_image
238
- // QL fit image
239
- ]
242
+ images
240
243
  };
241
244
  }
242
245
  const time1 = (/* @__PURE__ */ new Date()).valueOf();
@@ -259,7 +262,7 @@ async function readFileAndDelete(file, key, response) {
259
262
  throw err;
260
263
  });
261
264
  }
262
- async function validate_query_rnaseqGeneCount(ds, genome) {
265
+ async function validate_query_rnaseqGeneCount(ds) {
263
266
  const q = ds.queries.rnaseqGeneCount;
264
267
  if (!q)
265
268
  return;
@@ -278,6 +281,7 @@ async function validate_query_rnaseqGeneCount(ds, genome) {
278
281
  const time1 = (/* @__PURE__ */ new Date()).valueOf();
279
282
  const result = await run_rust("DEanalysis", JSON.stringify(get_samples_from_hdf5));
280
283
  const time2 = (/* @__PURE__ */ new Date()).valueOf();
284
+ mayLog("Time taken to query gene expression:", time2 - time1, "ms");
281
285
  samples = result.split(",");
282
286
  } else
283
287
  throw "unknown storage type:" + ds.queries.rnaseqGeneCount.storage_type;
@@ -1,5 +1,6 @@
1
1
  import path from "path";
2
2
  import run_R from "#src/run_R.js";
3
+ import { run_rust } from "@sjcrh/proteinpaint-rust";
3
4
  import { termdbClusterPayload } from "#types/checkers";
4
5
  import * as utils from "#src/utils.js";
5
6
  import serverconfig from "#src/serverconfig.js";
@@ -67,22 +68,32 @@ async function getResult(q, ds, genome) {
67
68
  _q = JSON.parse(JSON.stringify(q));
68
69
  _q.forClusteringAnalysis = true;
69
70
  }
70
- let term2sample2value, byTermId, bySampleId;
71
+ let term2sample2value, byTermId, bySampleId, skippedSexChrGenes;
71
72
  if (q.dataType == NUMERIC_DICTIONARY_TERM) {
72
73
  ;
73
74
  ({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds, genome));
74
75
  } else {
75
76
  ;
76
- ({ term2sample2value, byTermId, bySampleId } = await ds.queries[q.dataType].get(_q));
77
+ ({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries[q.dataType].get(_q));
77
78
  }
78
- const removedHierClusterTerms = [];
79
+ const noValueTerms = [];
79
80
  for (const [term, obj] of term2sample2value) {
80
81
  if (Object.keys(obj).length === 0) {
81
- removedHierClusterTerms.push(term);
82
+ noValueTerms.push(term);
82
83
  term2sample2value.delete(term);
83
84
  delete byTermId[term];
84
85
  }
85
86
  }
87
+ const removedHierClusterTerms = [];
88
+ if (noValueTerms.length) {
89
+ removedHierClusterTerms.push({
90
+ text: `Skipped ${q.dataType == TermTypes.GENE_EXPRESSION ? "genes" : "items"} with no data`,
91
+ lst: noValueTerms
92
+ });
93
+ }
94
+ if (skippedSexChrGenes?.length) {
95
+ removedHierClusterTerms.push({ text: "Skipped sex chromosome genes", lst: skippedSexChrGenes });
96
+ }
86
97
  if (term2sample2value.size == 0)
87
98
  throw "no data";
88
99
  if (term2sample2value.size == 1) {
@@ -213,92 +224,199 @@ async function validate_query_geneExpression(ds, genome) {
213
224
  }
214
225
  throw "unknown queries.geneExpression.src";
215
226
  }
227
+ async function validateHDF5File(filePath) {
228
+ try {
229
+ const jsonInput = JSON.stringify({
230
+ hdf5_file: filePath
231
+ });
232
+ const result = await run_rust("validateHDF5", jsonInput);
233
+ return JSON.parse(result);
234
+ } catch (error) {
235
+ console.error(`Error validating file: ${error}`);
236
+ return {
237
+ status: "error",
238
+ message: `Validation error`
239
+ };
240
+ }
241
+ }
242
+ async function queryGeneExpression(hdf5_file, geneName) {
243
+ const jsonInput = JSON.stringify({
244
+ hdf5_file,
245
+ gene: geneName
246
+ });
247
+ try {
248
+ const result = await run_rust("readHDF5", jsonInput);
249
+ if (!result || Object.keys(result).length === 0) {
250
+ throw new Error("Failed to retrieve expression data: Empty or missing response");
251
+ }
252
+ return result;
253
+ } catch (error) {
254
+ console.error(`Error querying gene expression for ${geneName}`);
255
+ throw error;
256
+ }
257
+ }
216
258
  async function validateNative(q, ds, genome) {
217
- if (!q.file.startsWith(serverconfig.tpmasterdir))
259
+ if (q.hdf5File === true) {
218
260
  q.file = path.join(serverconfig.tpmasterdir, q.file);
219
- if (!q.samples)
220
261
  q.samples = [];
221
- await utils.validate_tabixfile(q.file);
222
- q.nochr = await utils.tabix_is_nochr(q.file, null, genome);
223
- q.samples = [];
224
- {
225
- const lines = await utils.get_header_tabix(q.file);
226
- if (!lines[0])
227
- throw "header line missing from " + q.file;
228
- const l = lines[0].split(" ");
229
- if (l.slice(0, 4).join(" ") != "#chr start stop gene")
230
- throw "header line has wrong content for columns 1-4";
231
- for (let i = 4; i < l.length; i++) {
232
- const id = ds.cohort.termdb.q.sampleName2id(l[i]);
233
- if (id == void 0)
234
- throw "queries.geneExpression: unknown sample from header: " + l[i];
235
- q.samples.push(id);
236
- }
237
- }
238
- q.get = async (param) => {
239
- const limitSamples = await mayLimitSamples(param, q.samples, ds);
240
- if (limitSamples?.size == 0) {
241
- return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
242
- }
243
- const bySampleId = {};
244
- const samples = q.samples || [];
245
- if (limitSamples) {
246
- for (const sid of limitSamples) {
247
- bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
248
- }
249
- } else {
250
- for (const sid of samples) {
251
- bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
262
+ await utils.file_is_readable(q.file);
263
+ try {
264
+ const vr = await validateHDF5File(q.file);
265
+ if (vr.status !== "success")
266
+ throw vr.message;
267
+ if (!vr.sampleNames?.length)
268
+ throw "HDF5 file has no samples";
269
+ for (const sn of vr.sampleNames) {
270
+ const si = ds.cohort.termdb.q.sampleName2id(sn);
271
+ if (si == void 0)
272
+ throw "unknown sample from HDF5: " + sn;
273
+ q.samples.push(si);
252
274
  }
275
+ console.log(`${ds.label}: HDF5 file validated. Format: ${vr.format}, Samples:`, vr.sampleNames.length);
276
+ } catch (error) {
277
+ throw `${ds.label}: Failed to validate HDF5 file: ${error}`;
253
278
  }
254
- const term2sample2value = /* @__PURE__ */ new Map();
255
- for (const geneTerm of param.terms) {
256
- if (!geneTerm.gene)
257
- continue;
258
- if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop)) {
259
- const re = getResultGene(genome, { input: geneTerm.gene, deep: 1 });
260
- if (!re.gmlst || re.gmlst.length == 0) {
261
- console.warn("unknown gene:" + geneTerm.gene);
262
- continue;
279
+ q.get = async (param) => {
280
+ const limitSamples = await mayLimitSamples(param, q.samples, ds);
281
+ if (limitSamples?.size == 0) {
282
+ return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
283
+ }
284
+ const bySampleId = {};
285
+ const samples = q.samples || [];
286
+ if (limitSamples) {
287
+ for (const sid of limitSamples) {
288
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
289
+ }
290
+ } else {
291
+ for (const sid of samples) {
292
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
263
293
  }
264
- const i = re.gmlst.find((i2) => i2.isdefault) || re.gmlst[0];
265
- geneTerm.start = i.start;
266
- geneTerm.stop = i.stop;
267
- geneTerm.chr = i.chr;
268
294
  }
269
- const s2v = {};
270
- if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop))
271
- throw "missing chr/start/stop";
272
- await utils.get_lines_bigfile({
273
- args: [
274
- q.file,
275
- (q.nochr ? geneTerm.chr.replace("chr", "") : geneTerm.chr) + ":" + geneTerm.start + "-" + geneTerm.stop
276
- ],
277
- callback: (line) => {
278
- const l = line.split(" ");
279
- if (l[3].toLowerCase() != geneTerm.gene.toLowerCase())
280
- return;
281
- for (let i = 4; i < l.length; i++) {
282
- const sampleId = samples[i - 4];
283
- if (limitSamples && !limitSamples.has(sampleId))
295
+ const term2sample2value = /* @__PURE__ */ new Map();
296
+ const byTermId = {};
297
+ for (const geneTerm of param.terms) {
298
+ if (!geneTerm.gene)
299
+ continue;
300
+ try {
301
+ const geneQuery = await queryGeneExpression(q.file, geneTerm.gene);
302
+ const geneData = JSON.parse(geneQuery);
303
+ const samplesData = geneData.samples || {};
304
+ const s2v = {};
305
+ for (const [sampleName, value] of Object.entries(samplesData)) {
306
+ const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
307
+ if (!sampleId)
284
308
  continue;
285
- if (!l[i])
309
+ if (limitSamples && !limitSamples.has(sampleId))
286
310
  continue;
287
- const v = Number(l[i]);
288
- if (Number.isNaN(v))
289
- throw "exp value not number";
290
- s2v[sampleId] = v;
311
+ s2v[sampleId] = Number(value);
312
+ }
313
+ console.log(`Gene ${geneTerm.gene} has ${Object.keys(s2v).length} samples with data`);
314
+ if (Object.keys(s2v).length) {
315
+ term2sample2value.set(geneTerm.gene, s2v);
291
316
  }
317
+ } catch (error) {
318
+ console.warn(`Error processing gene ${geneTerm.gene}:`, error);
319
+ continue;
292
320
  }
293
- });
294
- if (Object.keys(s2v).length)
295
- term2sample2value.set(geneTerm.gene, s2v);
321
+ }
322
+ if (term2sample2value.size == 0) {
323
+ throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
324
+ }
325
+ return { term2sample2value, byTermId, bySampleId };
326
+ };
327
+ } else {
328
+ if (!q.file.startsWith(serverconfig.tpmasterdir)) {
329
+ q.file = path.join(serverconfig.tpmasterdir, q.file);
296
330
  }
297
- const byTermId = {};
298
- if (term2sample2value.size == 0)
299
- throw "no data available for the input " + param.terms?.map((g) => g.gene).join(", ");
300
- return { term2sample2value, byTermId, bySampleId };
301
- };
331
+ if (!q.samples)
332
+ q.samples = [];
333
+ await utils.validate_tabixfile(q.file);
334
+ q.nochr = await utils.tabix_is_nochr(q.file, null, genome);
335
+ q.samples = [];
336
+ {
337
+ const lines = await utils.get_header_tabix(q.file);
338
+ if (!lines[0])
339
+ throw "Header line missing from " + q.file;
340
+ const l = lines[0].split(" ");
341
+ if (l.slice(0, 4).join(" ") != "#chr start stop gene") {
342
+ throw "Header line has wrong content for columns 1-4";
343
+ }
344
+ for (let i = 4; i < l.length; i++) {
345
+ const id = ds.cohort.termdb.q.sampleName2id(l[i]);
346
+ if (id == void 0) {
347
+ throw "queries.geneExpression: unknown sample from header: " + l[i];
348
+ }
349
+ q.samples.push(id);
350
+ }
351
+ }
352
+ q.get = async (param) => {
353
+ const limitSamples = await mayLimitSamples(param, q.samples, ds);
354
+ if (limitSamples?.size == 0) {
355
+ return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
356
+ }
357
+ const bySampleId = {};
358
+ const samples = q.samples || [];
359
+ if (limitSamples) {
360
+ for (const sid of limitSamples) {
361
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
362
+ }
363
+ } else {
364
+ for (const sid of samples) {
365
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
366
+ }
367
+ }
368
+ const term2sample2value = /* @__PURE__ */ new Map();
369
+ for (const geneTerm of param.terms) {
370
+ if (!geneTerm.gene)
371
+ continue;
372
+ if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop)) {
373
+ const re = getResultGene(genome, { input: geneTerm.gene, deep: 1 });
374
+ if (!re.gmlst || re.gmlst.length == 0) {
375
+ console.warn("Unknown gene:" + geneTerm.gene);
376
+ continue;
377
+ }
378
+ const i = re.gmlst.find((i2) => i2.isdefault) || re.gmlst[0];
379
+ geneTerm.start = i.start;
380
+ geneTerm.stop = i.stop;
381
+ geneTerm.chr = i.chr;
382
+ }
383
+ const s2v = {};
384
+ if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop)) {
385
+ throw "Missing chr/start/stop";
386
+ }
387
+ await utils.get_lines_bigfile({
388
+ args: [
389
+ q.file,
390
+ (q.nochr ? geneTerm.chr.replace("chr", "") : geneTerm.chr) + ":" + geneTerm.start + "-" + geneTerm.stop
391
+ ],
392
+ callback: (line) => {
393
+ const l = line.split(" ");
394
+ if (l[3].toLowerCase() != geneTerm.gene.toLowerCase())
395
+ return;
396
+ for (let i = 4; i < l.length; i++) {
397
+ const sampleId = samples[i - 4];
398
+ if (limitSamples && !limitSamples.has(sampleId))
399
+ continue;
400
+ if (!l[i])
401
+ continue;
402
+ const v = Number(l[i]);
403
+ if (Number.isNaN(v))
404
+ throw "Expression value not number";
405
+ s2v[sampleId] = v;
406
+ }
407
+ }
408
+ });
409
+ if (Object.keys(s2v).length) {
410
+ term2sample2value.set(geneTerm.gene, s2v);
411
+ }
412
+ }
413
+ const byTermId = {};
414
+ if (term2sample2value.size == 0) {
415
+ throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
416
+ }
417
+ return { term2sample2value, byTermId, bySampleId };
418
+ };
419
+ }
302
420
  }
303
421
  export {
304
422
  api,
@@ -155,6 +155,8 @@ function addNonDictionaryQueries(c, ds, genome) {
155
155
  details: q.snvindel.details,
156
156
  populations: q.snvindel.populations
157
157
  };
158
+ if (q.snvindel.byisoform?.processTwsInOneQuery)
159
+ q2.snvindel.byisoform = { processTwsInOneQuery: true };
158
160
  }
159
161
  if (q.trackLst) {
160
162
  q2.trackLst = q.trackLst;