@sjcrh/proteinpaint-server 2.113.0 → 2.114.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.113.0",
3
+ "version": "2.114.0",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -65,9 +65,9 @@
65
65
  },
66
66
  "dependencies": {
67
67
  "@sjcrh/augen": "2.113.0",
68
- "@sjcrh/proteinpaint-rust": "2.112.0",
69
- "@sjcrh/proteinpaint-shared": "2.113.0",
70
- "@sjcrh/proteinpaint-types": "2.113.0",
68
+ "@sjcrh/proteinpaint-rust": "2.114.0",
69
+ "@sjcrh/proteinpaint-shared": "2.114.0",
70
+ "@sjcrh/proteinpaint-types": "2.114.0",
71
71
  "@types/express": "^5.0.0",
72
72
  "@types/express-session": "^1.18.1",
73
73
  "better-sqlite3": "^9.4.1",
@@ -1,5 +1,6 @@
1
1
  import path from "path";
2
2
  import run_R from "#src/run_R.js";
3
+ import { run_rust } from "@sjcrh/proteinpaint-rust";
3
4
  import { termdbClusterPayload } from "#types/checkers";
4
5
  import * as utils from "#src/utils.js";
5
6
  import serverconfig from "#src/serverconfig.js";
@@ -223,92 +224,199 @@ async function validate_query_geneExpression(ds, genome) {
223
224
  }
224
225
  throw "unknown queries.geneExpression.src";
225
226
  }
227
+ async function validateHDF5File(filePath) {
228
+ try {
229
+ const jsonInput = JSON.stringify({
230
+ hdf5_file: filePath
231
+ });
232
+ const result = await run_rust("validateHDF5", jsonInput);
233
+ return JSON.parse(result);
234
+ } catch (error) {
235
+ console.error(`Error validating file: ${error}`);
236
+ return {
237
+ status: "error",
238
+ message: `Validation error`
239
+ };
240
+ }
241
+ }
242
+ async function queryGeneExpression(hdf5_file, geneName) {
243
+ const jsonInput = JSON.stringify({
244
+ hdf5_file,
245
+ gene: geneName
246
+ });
247
+ try {
248
+ const result = await run_rust("readHDF5", jsonInput);
249
+ if (!result || Object.keys(result).length === 0) {
250
+ throw new Error("Failed to retrieve expression data: Empty or missing response");
251
+ }
252
+ return result;
253
+ } catch (error) {
254
+ console.error(`Error querying gene expression for ${geneName}`);
255
+ throw error;
256
+ }
257
+ }
226
258
  async function validateNative(q, ds, genome) {
227
- if (!q.file.startsWith(serverconfig.tpmasterdir))
259
+ if (q.hdf5File === true) {
228
260
  q.file = path.join(serverconfig.tpmasterdir, q.file);
229
- if (!q.samples)
230
261
  q.samples = [];
231
- await utils.validate_tabixfile(q.file);
232
- q.nochr = await utils.tabix_is_nochr(q.file, null, genome);
233
- q.samples = [];
234
- {
235
- const lines = await utils.get_header_tabix(q.file);
236
- if (!lines[0])
237
- throw "header line missing from " + q.file;
238
- const l = lines[0].split(" ");
239
- if (l.slice(0, 4).join(" ") != "#chr start stop gene")
240
- throw "header line has wrong content for columns 1-4";
241
- for (let i = 4; i < l.length; i++) {
242
- const id = ds.cohort.termdb.q.sampleName2id(l[i]);
243
- if (id == void 0)
244
- throw "queries.geneExpression: unknown sample from header: " + l[i];
245
- q.samples.push(id);
246
- }
247
- }
248
- q.get = async (param) => {
249
- const limitSamples = await mayLimitSamples(param, q.samples, ds);
250
- if (limitSamples?.size == 0) {
251
- return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
252
- }
253
- const bySampleId = {};
254
- const samples = q.samples || [];
255
- if (limitSamples) {
256
- for (const sid of limitSamples) {
257
- bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
258
- }
259
- } else {
260
- for (const sid of samples) {
261
- bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
262
+ await utils.file_is_readable(q.file);
263
+ try {
264
+ const vr = await validateHDF5File(q.file);
265
+ if (vr.status !== "success")
266
+ throw vr.message;
267
+ if (!vr.sampleNames?.length)
268
+ throw "HDF5 file has no samples";
269
+ for (const sn of vr.sampleNames) {
270
+ const si = ds.cohort.termdb.q.sampleName2id(sn);
271
+ if (si == void 0)
272
+ throw "unknown sample from HDF5: " + sn;
273
+ q.samples.push(si);
262
274
  }
275
+ console.log(`${ds.label}: HDF5 file validated. Format: ${vr.format}, Samples:`, vr.sampleNames.length);
276
+ } catch (error) {
277
+ throw `${ds.label}: Failed to validate HDF5 file: ${error}`;
263
278
  }
264
- const term2sample2value = /* @__PURE__ */ new Map();
265
- for (const geneTerm of param.terms) {
266
- if (!geneTerm.gene)
267
- continue;
268
- if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop)) {
269
- const re = getResultGene(genome, { input: geneTerm.gene, deep: 1 });
270
- if (!re.gmlst || re.gmlst.length == 0) {
271
- console.warn("unknown gene:" + geneTerm.gene);
272
- continue;
279
+ q.get = async (param) => {
280
+ const limitSamples = await mayLimitSamples(param, q.samples, ds);
281
+ if (limitSamples?.size == 0) {
282
+ return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
283
+ }
284
+ const bySampleId = {};
285
+ const samples = q.samples || [];
286
+ if (limitSamples) {
287
+ for (const sid of limitSamples) {
288
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
289
+ }
290
+ } else {
291
+ for (const sid of samples) {
292
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
273
293
  }
274
- const i = re.gmlst.find((i2) => i2.isdefault) || re.gmlst[0];
275
- geneTerm.start = i.start;
276
- geneTerm.stop = i.stop;
277
- geneTerm.chr = i.chr;
278
294
  }
279
- const s2v = {};
280
- if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop))
281
- throw "missing chr/start/stop";
282
- await utils.get_lines_bigfile({
283
- args: [
284
- q.file,
285
- (q.nochr ? geneTerm.chr.replace("chr", "") : geneTerm.chr) + ":" + geneTerm.start + "-" + geneTerm.stop
286
- ],
287
- callback: (line) => {
288
- const l = line.split(" ");
289
- if (l[3].toLowerCase() != geneTerm.gene.toLowerCase())
290
- return;
291
- for (let i = 4; i < l.length; i++) {
292
- const sampleId = samples[i - 4];
293
- if (limitSamples && !limitSamples.has(sampleId))
295
+ const term2sample2value = /* @__PURE__ */ new Map();
296
+ const byTermId = {};
297
+ for (const geneTerm of param.terms) {
298
+ if (!geneTerm.gene)
299
+ continue;
300
+ try {
301
+ const geneQuery = await queryGeneExpression(q.file, geneTerm.gene);
302
+ const geneData = JSON.parse(geneQuery);
303
+ const samplesData = geneData.samples || {};
304
+ const s2v = {};
305
+ for (const [sampleName, value] of Object.entries(samplesData)) {
306
+ const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
307
+ if (!sampleId)
294
308
  continue;
295
- if (!l[i])
309
+ if (limitSamples && !limitSamples.has(sampleId))
296
310
  continue;
297
- const v = Number(l[i]);
298
- if (Number.isNaN(v))
299
- throw "exp value not number";
300
- s2v[sampleId] = v;
311
+ s2v[sampleId] = Number(value);
312
+ }
313
+ console.log(`Gene ${geneTerm.gene} has ${Object.keys(s2v).length} samples with data`);
314
+ if (Object.keys(s2v).length) {
315
+ term2sample2value.set(geneTerm.gene, s2v);
301
316
  }
317
+ } catch (error) {
318
+ console.warn(`Error processing gene ${geneTerm.gene}:`, error);
319
+ continue;
320
+ }
321
+ }
322
+ if (term2sample2value.size == 0) {
323
+ throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
324
+ }
325
+ return { term2sample2value, byTermId, bySampleId };
326
+ };
327
+ } else {
328
+ if (!q.file.startsWith(serverconfig.tpmasterdir)) {
329
+ q.file = path.join(serverconfig.tpmasterdir, q.file);
330
+ }
331
+ if (!q.samples)
332
+ q.samples = [];
333
+ await utils.validate_tabixfile(q.file);
334
+ q.nochr = await utils.tabix_is_nochr(q.file, null, genome);
335
+ q.samples = [];
336
+ {
337
+ const lines = await utils.get_header_tabix(q.file);
338
+ if (!lines[0])
339
+ throw "Header line missing from " + q.file;
340
+ const l = lines[0].split(" ");
341
+ if (l.slice(0, 4).join(" ") != "#chr start stop gene") {
342
+ throw "Header line has wrong content for columns 1-4";
343
+ }
344
+ for (let i = 4; i < l.length; i++) {
345
+ const id = ds.cohort.termdb.q.sampleName2id(l[i]);
346
+ if (id == void 0) {
347
+ throw "queries.geneExpression: unknown sample from header: " + l[i];
302
348
  }
303
- });
304
- if (Object.keys(s2v).length)
305
- term2sample2value.set(geneTerm.gene, s2v);
349
+ q.samples.push(id);
350
+ }
306
351
  }
307
- const byTermId = {};
308
- if (term2sample2value.size == 0)
309
- throw "no data available for the input " + param.terms?.map((g) => g.gene).join(", ");
310
- return { term2sample2value, byTermId, bySampleId };
311
- };
352
+ q.get = async (param) => {
353
+ const limitSamples = await mayLimitSamples(param, q.samples, ds);
354
+ if (limitSamples?.size == 0) {
355
+ return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
356
+ }
357
+ const bySampleId = {};
358
+ const samples = q.samples || [];
359
+ if (limitSamples) {
360
+ for (const sid of limitSamples) {
361
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
362
+ }
363
+ } else {
364
+ for (const sid of samples) {
365
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
366
+ }
367
+ }
368
+ const term2sample2value = /* @__PURE__ */ new Map();
369
+ for (const geneTerm of param.terms) {
370
+ if (!geneTerm.gene)
371
+ continue;
372
+ if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop)) {
373
+ const re = getResultGene(genome, { input: geneTerm.gene, deep: 1 });
374
+ if (!re.gmlst || re.gmlst.length == 0) {
375
+ console.warn("Unknown gene:" + geneTerm.gene);
376
+ continue;
377
+ }
378
+ const i = re.gmlst.find((i2) => i2.isdefault) || re.gmlst[0];
379
+ geneTerm.start = i.start;
380
+ geneTerm.stop = i.stop;
381
+ geneTerm.chr = i.chr;
382
+ }
383
+ const s2v = {};
384
+ if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop)) {
385
+ throw "Missing chr/start/stop";
386
+ }
387
+ await utils.get_lines_bigfile({
388
+ args: [
389
+ q.file,
390
+ (q.nochr ? geneTerm.chr.replace("chr", "") : geneTerm.chr) + ":" + geneTerm.start + "-" + geneTerm.stop
391
+ ],
392
+ callback: (line) => {
393
+ const l = line.split(" ");
394
+ if (l[3].toLowerCase() != geneTerm.gene.toLowerCase())
395
+ return;
396
+ for (let i = 4; i < l.length; i++) {
397
+ const sampleId = samples[i - 4];
398
+ if (limitSamples && !limitSamples.has(sampleId))
399
+ continue;
400
+ if (!l[i])
401
+ continue;
402
+ const v = Number(l[i]);
403
+ if (Number.isNaN(v))
404
+ throw "Expression value not number";
405
+ s2v[sampleId] = v;
406
+ }
407
+ }
408
+ });
409
+ if (Object.keys(s2v).length) {
410
+ term2sample2value.set(geneTerm.gene, s2v);
411
+ }
412
+ }
413
+ const byTermId = {};
414
+ if (term2sample2value.size == 0) {
415
+ throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
416
+ }
417
+ return { term2sample2value, byTermId, bySampleId };
418
+ };
419
+ }
312
420
  }
313
421
  export {
314
422
  api,
@@ -155,6 +155,8 @@ function addNonDictionaryQueries(c, ds, genome) {
155
155
  details: q.snvindel.details,
156
156
  populations: q.snvindel.populations
157
157
  };
158
+ if (q.snvindel.byisoform?.processTwsInOneQuery)
159
+ q2.snvindel.byisoform = { processTwsInOneQuery: true };
158
160
  }
159
161
  if (q.trackLst) {
160
162
  q2.trackLst = q.trackLst;