@sjcrh/proteinpaint-server 2.120.0 → 2.120.2-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +4 -6
- package/package.json +4 -4
- package/routes/termdb.cluster.js +29 -138
- package/src/app.js +36 -155
- package/utils/edge.R +7 -0
package/dataset/termdb.test.js
CHANGED
|
@@ -154,12 +154,6 @@ function termdb_test_default() {
|
|
|
154
154
|
cnv: {
|
|
155
155
|
file: "files/hg38/TermdbTest/TermdbTest_CNV_gene.gz"
|
|
156
156
|
},
|
|
157
|
-
/*
|
|
158
|
-
on the fly cnv calls from gene body probe signals are no longer used
|
|
159
|
-
probe2cnv:{
|
|
160
|
-
file: 'files/hg19/pnet/PNET.probesignals.gz'
|
|
161
|
-
}
|
|
162
|
-
*/
|
|
163
157
|
singleSampleMutation: {
|
|
164
158
|
src: "native",
|
|
165
159
|
sample_id_key: "sample_id",
|
|
@@ -197,6 +191,10 @@ function termdb_test_default() {
|
|
|
197
191
|
topVariablyExpressedGenes: {
|
|
198
192
|
src: "native"
|
|
199
193
|
},
|
|
194
|
+
rnaseqGeneCount: {
|
|
195
|
+
storage_type: "HDF5",
|
|
196
|
+
file: "files/hg38/TermdbTest/TermdbTest.geneCounts.h5"
|
|
197
|
+
},
|
|
200
198
|
WSImages: {
|
|
201
199
|
type: "H&E",
|
|
202
200
|
imageBySampleFolder: "files/hg38/TermdbTest/wsimages"
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.120.0",
|
|
3
|
+
"version": "2.120.2-0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -65,10 +65,10 @@
|
|
|
65
65
|
"typescript": "^5.6.3"
|
|
66
66
|
},
|
|
67
67
|
"dependencies": {
|
|
68
|
-
"@sjcrh/augen": "2.
|
|
68
|
+
"@sjcrh/augen": "2.120.2-0",
|
|
69
69
|
"@sjcrh/proteinpaint-python": "2.118.0",
|
|
70
|
-
"@sjcrh/proteinpaint-rust": "2.120.0",
|
|
71
|
-
"@sjcrh/proteinpaint-shared": "2.
|
|
70
|
+
"@sjcrh/proteinpaint-rust": "2.120.2-0",
|
|
71
|
+
"@sjcrh/proteinpaint-shared": "2.120.1",
|
|
72
72
|
"@sjcrh/proteinpaint-types": "2.119.0",
|
|
73
73
|
"@types/express": "^5.0.0",
|
|
74
74
|
"@types/express-session": "^1.18.1",
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -12,7 +12,6 @@ import { getData } from "#src/termdb.matrix.js";
|
|
|
12
12
|
import { termType2label } from "#shared/terms.js";
|
|
13
13
|
import { mayLog } from "#src/helpers.ts";
|
|
14
14
|
import { formatElapsedTime } from "#shared/time.js";
|
|
15
|
-
import { getResult as getResultGene } from "#src/gene.js";
|
|
16
15
|
const api = {
|
|
17
16
|
endpoint: "termdb/cluster",
|
|
18
17
|
methods: {
|
|
@@ -220,26 +219,11 @@ async function validate_query_geneExpression(ds, genome) {
|
|
|
220
219
|
return;
|
|
221
220
|
}
|
|
222
221
|
if (q.src == "native") {
|
|
223
|
-
await validateNative(q, ds
|
|
222
|
+
await validateNative(q, ds);
|
|
224
223
|
return;
|
|
225
224
|
}
|
|
226
225
|
throw "unknown queries.geneExpression.src";
|
|
227
226
|
}
|
|
228
|
-
async function validateHDF5File(filePath) {
|
|
229
|
-
try {
|
|
230
|
-
const jsonInput = JSON.stringify({
|
|
231
|
-
hdf5_file: filePath
|
|
232
|
-
});
|
|
233
|
-
const result = await run_rust("validateHDF5", jsonInput);
|
|
234
|
-
return JSON.parse(result);
|
|
235
|
-
} catch (error) {
|
|
236
|
-
console.error(`Error validating file: ${error}`);
|
|
237
|
-
return {
|
|
238
|
-
status: "error",
|
|
239
|
-
message: `Validation error`
|
|
240
|
-
};
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
227
|
async function queryGeneExpression(hdf5_file, geneNames) {
|
|
244
228
|
const jsonInput = JSON.stringify({
|
|
245
229
|
hdf5_file,
|
|
@@ -256,20 +240,21 @@ async function queryGeneExpression(hdf5_file, geneNames) {
|
|
|
256
240
|
throw error;
|
|
257
241
|
}
|
|
258
242
|
}
|
|
259
|
-
async function validateNative(q, ds
|
|
243
|
+
async function validateNative(q, ds) {
|
|
260
244
|
q.file = path.join(serverconfig.tpmasterdir, q.file);
|
|
261
245
|
q.samples = [];
|
|
262
|
-
await utils.file_is_readable(q.file);
|
|
263
246
|
try {
|
|
264
|
-
|
|
247
|
+
await utils.file_is_readable(q.file);
|
|
248
|
+
const tmp = await run_rust("validateHDF5", JSON.stringify({ hdf5_file: q.file }));
|
|
249
|
+
const vr = JSON.parse(tmp);
|
|
265
250
|
if (vr.status !== "success")
|
|
266
251
|
throw vr.message;
|
|
267
252
|
if (!vr.sampleNames?.length)
|
|
268
|
-
throw "HDF5 file has no samples";
|
|
253
|
+
throw "HDF5 file has no samples, please check file.";
|
|
269
254
|
for (const sn of vr.sampleNames) {
|
|
270
255
|
const si = ds.cohort.termdb.q.sampleName2id(sn);
|
|
271
256
|
if (si == void 0)
|
|
272
|
-
throw
|
|
257
|
+
throw `unknown sample ${sn} from HDF5 ${q.file}`;
|
|
273
258
|
q.samples.push(si);
|
|
274
259
|
}
|
|
275
260
|
console.log(
|
|
@@ -277,97 +262,7 @@ async function validateNative(q, ds, genome) {
|
|
|
277
262
|
vr.sampleNames.length
|
|
278
263
|
);
|
|
279
264
|
} catch (error) {
|
|
280
|
-
|
|
281
|
-
try {
|
|
282
|
-
q.samples = [];
|
|
283
|
-
await utils.validate_tabixfile(q.file);
|
|
284
|
-
q.nochr = await utils.tabix_is_nochr(q.file, null, genome);
|
|
285
|
-
const lines = await utils.get_header_tabix(q.file);
|
|
286
|
-
if (!lines[0])
|
|
287
|
-
throw "Header line missing from " + q.file;
|
|
288
|
-
const l = lines[0].split(" ");
|
|
289
|
-
if (l.slice(0, 4).join(" ") != "#chr start stop gene") {
|
|
290
|
-
throw "Header line has wrong content for columns 1-4";
|
|
291
|
-
}
|
|
292
|
-
for (let i = 4; i < l.length; i++) {
|
|
293
|
-
const id = ds.cohort.termdb.q.sampleName2id(l[i]);
|
|
294
|
-
if (id == void 0) {
|
|
295
|
-
throw "queries.geneExpression: unknown sample from header: " + l[i];
|
|
296
|
-
}
|
|
297
|
-
q.samples.push(id);
|
|
298
|
-
}
|
|
299
|
-
} catch (e) {
|
|
300
|
-
throw `${ds.label} geneExpression file cannot be validated as tabix file`;
|
|
301
|
-
}
|
|
302
|
-
console.log(`${ds.label}: Tabix file successfully initialized. Samples: ${q.samples.length}`);
|
|
303
|
-
q.get = async (param) => {
|
|
304
|
-
const limitSamples = await mayLimitSamples(param, q.samples, ds);
|
|
305
|
-
if (limitSamples?.size == 0) {
|
|
306
|
-
return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
|
|
307
|
-
}
|
|
308
|
-
const bySampleId = {};
|
|
309
|
-
const samples = q.samples || [];
|
|
310
|
-
if (limitSamples) {
|
|
311
|
-
for (const sid of limitSamples) {
|
|
312
|
-
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
|
|
313
|
-
}
|
|
314
|
-
} else {
|
|
315
|
-
for (const sid of samples) {
|
|
316
|
-
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
const term2sample2value = /* @__PURE__ */ new Map();
|
|
320
|
-
const byTermId = {};
|
|
321
|
-
for (const geneTerm of param.terms) {
|
|
322
|
-
if (!geneTerm.gene)
|
|
323
|
-
continue;
|
|
324
|
-
if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop)) {
|
|
325
|
-
const re = getResultGene(genome, { input: geneTerm.gene, deep: 1 });
|
|
326
|
-
if (!re.gmlst || re.gmlst.length == 0) {
|
|
327
|
-
console.warn("Unknown gene:" + geneTerm.gene);
|
|
328
|
-
continue;
|
|
329
|
-
}
|
|
330
|
-
const i = re.gmlst.find((i2) => i2.isdefault) || re.gmlst[0];
|
|
331
|
-
geneTerm.start = i.start;
|
|
332
|
-
geneTerm.stop = i.stop;
|
|
333
|
-
geneTerm.chr = i.chr;
|
|
334
|
-
}
|
|
335
|
-
if (!geneTerm.chr || !Number.isInteger(geneTerm.start) || !Number.isInteger(geneTerm.stop)) {
|
|
336
|
-
throw "Missing chr/start/stop";
|
|
337
|
-
}
|
|
338
|
-
const s2v = {};
|
|
339
|
-
await utils.get_lines_bigfile({
|
|
340
|
-
args: [
|
|
341
|
-
q.file,
|
|
342
|
-
(q.nochr ? geneTerm.chr.replace("chr", "") : geneTerm.chr) + ":" + geneTerm.start + "-" + geneTerm.stop
|
|
343
|
-
],
|
|
344
|
-
callback: (line) => {
|
|
345
|
-
const l = line.split(" ");
|
|
346
|
-
if (l[3].toLowerCase() != geneTerm.gene.toLowerCase())
|
|
347
|
-
return;
|
|
348
|
-
for (let i = 4; i < l.length; i++) {
|
|
349
|
-
const sampleId = samples[i - 4];
|
|
350
|
-
if (limitSamples && !limitSamples.has(sampleId))
|
|
351
|
-
continue;
|
|
352
|
-
if (!l[i])
|
|
353
|
-
continue;
|
|
354
|
-
const v = Number(l[i]);
|
|
355
|
-
if (Number.isNaN(v))
|
|
356
|
-
throw "Expression value not number";
|
|
357
|
-
s2v[sampleId] = v;
|
|
358
|
-
}
|
|
359
|
-
}
|
|
360
|
-
});
|
|
361
|
-
if (Object.keys(s2v).length) {
|
|
362
|
-
term2sample2value.set(geneTerm.gene, s2v);
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
if (term2sample2value.size == 0) {
|
|
366
|
-
throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
|
|
367
|
-
}
|
|
368
|
-
return { term2sample2value, byTermId, bySampleId };
|
|
369
|
-
};
|
|
370
|
-
return;
|
|
265
|
+
throw `${ds.label}: Failed to validate geneExpression HDF5 file: ${error}`;
|
|
371
266
|
}
|
|
372
267
|
q.get = async (param) => {
|
|
373
268
|
const limitSamples = await mayLimitSamples(param, q.samples, ds);
|
|
@@ -398,34 +293,30 @@ async function validateNative(q, ds, genome) {
|
|
|
398
293
|
return { term2sample2value, byTermId };
|
|
399
294
|
}
|
|
400
295
|
const time1 = Date.now();
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
296
|
+
const geneData = JSON.parse(await queryGeneExpression(q.file, geneNames));
|
|
297
|
+
mayLog("Time taken to run gene query:", formatElapsedTime(Date.now() - time1));
|
|
298
|
+
const genesData = geneData.genes || { [geneNames[0]]: geneData };
|
|
299
|
+
for (const geneTerm of param.terms) {
|
|
300
|
+
if (!geneTerm.gene)
|
|
301
|
+
continue;
|
|
302
|
+
const geneResult = genesData[geneTerm.gene];
|
|
303
|
+
if (!geneResult) {
|
|
304
|
+
console.warn(`No data found for gene ${geneTerm.gene} in the response`);
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
const samplesData = geneResult.samples || {};
|
|
308
|
+
const s2v = {};
|
|
309
|
+
for (const [sampleName, value] of Object.entries(samplesData)) {
|
|
310
|
+
const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
|
|
311
|
+
if (!sampleId)
|
|
407
312
|
continue;
|
|
408
|
-
|
|
409
|
-
if (!geneResult) {
|
|
410
|
-
console.warn(`No data found for gene ${geneTerm.gene} in the response`);
|
|
313
|
+
if (limitSamples && !limitSamples.has(sampleId))
|
|
411
314
|
continue;
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
|
|
417
|
-
if (!sampleId)
|
|
418
|
-
continue;
|
|
419
|
-
if (limitSamples && !limitSamples.has(sampleId))
|
|
420
|
-
continue;
|
|
421
|
-
s2v[sampleId] = value;
|
|
422
|
-
}
|
|
423
|
-
if (Object.keys(s2v).length) {
|
|
424
|
-
term2sample2value.set(geneTerm.gene, s2v);
|
|
425
|
-
}
|
|
315
|
+
s2v[sampleId] = value;
|
|
316
|
+
}
|
|
317
|
+
if (Object.keys(s2v).length) {
|
|
318
|
+
term2sample2value.set(geneTerm.gene, s2v);
|
|
426
319
|
}
|
|
427
|
-
} catch (error) {
|
|
428
|
-
console.error(`Error processing batch gene query:`, error);
|
|
429
320
|
}
|
|
430
321
|
if (term2sample2value.size == 0) {
|
|
431
322
|
throw "No data available for the input " + param.terms?.map((g) => g.gene).join(", ");
|