@sjcrh/proteinpaint-server 2.182.2 → 2.183.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/routes/termdb.proteome.js +149 -161
- package/src/app.js +513 -545
- package/src/serverconfig.js +0 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.183.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -62,11 +62,11 @@
|
|
|
62
62
|
},
|
|
63
63
|
"dependencies": {
|
|
64
64
|
"@sjcrh/augen": "2.181.1",
|
|
65
|
-
"@sjcrh/proteinpaint-python": "2.
|
|
65
|
+
"@sjcrh/proteinpaint-python": "2.183.0",
|
|
66
66
|
"@sjcrh/proteinpaint-r": "2.181.0",
|
|
67
|
-
"@sjcrh/proteinpaint-rust": "2.
|
|
68
|
-
"@sjcrh/proteinpaint-shared": "2.
|
|
69
|
-
"@sjcrh/proteinpaint-types": "2.
|
|
67
|
+
"@sjcrh/proteinpaint-rust": "2.183.0",
|
|
68
|
+
"@sjcrh/proteinpaint-shared": "2.183.0",
|
|
69
|
+
"@sjcrh/proteinpaint-types": "2.183.1",
|
|
70
70
|
"@types/express": "^5.0.0",
|
|
71
71
|
"@types/express-session": "^1.18.1",
|
|
72
72
|
"better-sqlite3": "^12.4.1",
|
|
@@ -1,10 +1,6 @@
|
|
|
1
|
-
import path from "path";
|
|
2
1
|
import { termdbProteomePayload } from "#types/checkers";
|
|
3
|
-
import { filterJoin, getWrappedTvslst } from "#shared/filter.js";
|
|
4
2
|
import { get_ds_tdb } from "#src/termdb.js";
|
|
5
|
-
import { get_samples } from "#src/termdb.sql.js";
|
|
6
3
|
import * as utils from "#src/utils.js";
|
|
7
|
-
import serverconfig from "#src/serverconfig.js";
|
|
8
4
|
import { mayLimitSamples } from "#src/mds3.filter.js";
|
|
9
5
|
const api = {
|
|
10
6
|
endpoint: "termdb/proteome",
|
|
@@ -32,11 +28,16 @@ function init({ genomes }) {
|
|
|
32
28
|
const cohorts = [];
|
|
33
29
|
for (const assayName in ds.queries.proteome.assays) {
|
|
34
30
|
const assay = ds.queries.proteome.assays[assayName];
|
|
35
|
-
for (const
|
|
31
|
+
for (const cohort of assay.cohorts) {
|
|
36
32
|
const details = {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
33
|
+
dbfile: ds.queries.proteome.dbfile,
|
|
34
|
+
assayName,
|
|
35
|
+
cohortName: cohort.cohortName,
|
|
36
|
+
cohortControlFilter: cohort.controlFilter,
|
|
37
|
+
cohortCaseFilter: cohort.caseFilter,
|
|
38
|
+
PTMType: assay.PTMType,
|
|
39
|
+
assayColumnIdx: assay.columnIdx,
|
|
40
|
+
assayColumnValue: assay.columnValue
|
|
40
41
|
};
|
|
41
42
|
const tw = {
|
|
42
43
|
$id: "_",
|
|
@@ -46,7 +47,7 @@ function init({ genomes }) {
|
|
|
46
47
|
proteomeDetails: details
|
|
47
48
|
}
|
|
48
49
|
};
|
|
49
|
-
const
|
|
50
|
+
const cohortData = await ds.queries.proteome.get({
|
|
50
51
|
terms: [tw],
|
|
51
52
|
proteomeDetails: details,
|
|
52
53
|
filter: q.filter,
|
|
@@ -54,27 +55,17 @@ function init({ genomes }) {
|
|
|
54
55
|
for: "proteinView",
|
|
55
56
|
__abortSignal: q.__abortSignal
|
|
56
57
|
});
|
|
57
|
-
const
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
filterConfig.length > 1 ? "and" : ""
|
|
61
|
-
) : null;
|
|
62
|
-
let controlSampleIds = /* @__PURE__ */ new Set();
|
|
63
|
-
if (ctlFilter) {
|
|
64
|
-
const controlFilter = filterJoin([q.filter, ctlFilter].filter((f) => !!f));
|
|
65
|
-
const controlSamples = await get_samples({ filter: controlFilter }, ds);
|
|
66
|
-
controlSampleIds = new Set(controlSamples.map((i) => String(i.id)));
|
|
67
|
-
}
|
|
68
|
-
for (const cohortData of allData.allEntries || []) {
|
|
69
|
-
const s2v = cohortData.s2v;
|
|
58
|
+
const controlSampleIds = cohortData.controlSampleIds || /* @__PURE__ */ new Set();
|
|
59
|
+
for (const entry of cohortData.allEntries || []) {
|
|
60
|
+
const s2v = entry.s2v;
|
|
70
61
|
const stats = getCohortStats(s2v, controlSampleIds);
|
|
71
|
-
delete
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
if (assay.mclassOverride)
|
|
77
|
-
cohorts.push(
|
|
62
|
+
delete entry.s2v;
|
|
63
|
+
entry.foldChange = stats.foldChange;
|
|
64
|
+
entry.pValue = stats.pValue;
|
|
65
|
+
entry.testedN = stats.testedN;
|
|
66
|
+
entry.controlN = stats.controlN;
|
|
67
|
+
if (assay.mclassOverride) entry.mclassOverride = assay.mclassOverride;
|
|
68
|
+
cohorts.push(entry);
|
|
78
69
|
}
|
|
79
70
|
}
|
|
80
71
|
}
|
|
@@ -213,186 +204,183 @@ async function validate_query_proteome(ds) {
|
|
|
213
204
|
if (!q.assays) {
|
|
214
205
|
throw "queries.proteome.assays is missing";
|
|
215
206
|
}
|
|
207
|
+
if (!q.dbfile) {
|
|
208
|
+
throw "queries.proteome.dbfile is missing";
|
|
209
|
+
}
|
|
210
|
+
try {
|
|
211
|
+
q.db = utils.connect_db(q.dbfile);
|
|
212
|
+
} catch (e) {
|
|
213
|
+
throw `Cannot connect to proteome db ${q.dbfile}: ${e.message || e}`;
|
|
214
|
+
}
|
|
216
215
|
for (const assayName in q.assays) {
|
|
217
216
|
const assay = q.assays[assayName];
|
|
217
|
+
if (!assay.columnIdx) throw `queries.proteome.assays.${assayName}.columnIdx missing`;
|
|
218
|
+
if (!assay.columnValue) throw `queries.proteome.assays.${assayName}.columnValue missing`;
|
|
218
219
|
if (assay.cohorts) {
|
|
219
220
|
console.log(`Validating assay "${assayName}" with multiple cohorts`);
|
|
220
|
-
for (const
|
|
221
|
-
|
|
222
|
-
if (!cohort.
|
|
223
|
-
throw `Missing
|
|
224
|
-
|
|
225
|
-
|
|
221
|
+
for (const cohort of assay.cohorts) {
|
|
222
|
+
if (!cohort.cohortName) throw `Missing cohortName in queries.proteome.assays.${assayName}.cohorts`;
|
|
223
|
+
if (!cohort.controlFilter)
|
|
224
|
+
throw `Missing controlFilter in queries.proteome.assays.${assayName}.cohorts.${cohort.cohortName}`;
|
|
225
|
+
if (!cohort.caseFilter)
|
|
226
|
+
throw `Missing caseFilter in queries.proteome.assays.${assayName}.cohorts.${cohort.cohortName}`;
|
|
226
227
|
}
|
|
227
228
|
} else {
|
|
228
229
|
throw `Invalid assay structure for "${assayName}". Must have .cohorts`;
|
|
229
230
|
}
|
|
230
231
|
}
|
|
231
|
-
q.getCohort = (proteomeDetails) => {
|
|
232
|
-
const assay = proteomeDetails?.assay;
|
|
233
|
-
const cohort = proteomeDetails?.cohort;
|
|
234
|
-
if (!assay || !cohort) throw "proteomeDetails.{assay,cohort} missing";
|
|
235
|
-
const cohortQuery = q.assays?.[assay]?.cohorts?.[cohort];
|
|
236
|
-
if (!cohortQuery) throw `queries.proteome.assays.${assay}.cohorts.${cohort} missing for the dataset`;
|
|
237
|
-
return cohortQuery;
|
|
238
|
-
};
|
|
239
232
|
q.find = async (arg) => {
|
|
240
233
|
const proteins = arg?.proteins;
|
|
241
234
|
if (!Array.isArray(proteins) || proteins.length == 0) throw "queries.proteome.find arg.proteins[] missing";
|
|
242
|
-
|
|
243
|
-
const cohortQuery = q.getCohort(arg.proteomeDetails);
|
|
244
|
-
return findProteinsInCohort(cohortQuery, proteins);
|
|
245
|
-
}
|
|
246
|
-
return findProteinsAcrossNonPTMCohorts(q, proteins);
|
|
235
|
+
return findProteinsInCohort(q.db, proteins);
|
|
247
236
|
};
|
|
248
237
|
q.get = async (param) => {
|
|
249
238
|
if (!param?.terms?.length) throw "queries.proteome.get param.terms[] missing";
|
|
250
|
-
if (!param.proteomeDetails?.
|
|
251
|
-
throw "queries.proteome.get param.proteomeDetails.{
|
|
252
|
-
|
|
253
|
-
|
|
239
|
+
if (!param.proteomeDetails?.assayName || !param.proteomeDetails?.cohortName)
|
|
240
|
+
throw "queries.proteome.get param.proteomeDetails.{assayName,cohortName} missing";
|
|
241
|
+
if (!param.proteomeDetails?.cohortControlFilter || !param.proteomeDetails?.cohortCaseFilter || !param.proteomeDetails?.assayColumnIdx || !param.proteomeDetails?.assayColumnValue)
|
|
242
|
+
throw "queries.proteome.get param.proteomeDetails.{cohortControlFilter, cohortCaseFilter, assayColumnIdx, assayColumnValue} missing";
|
|
243
|
+
return await getProteomeValuesFromCohort(ds, param);
|
|
254
244
|
};
|
|
255
245
|
}
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
}
|
|
246
|
+
const columnIdxToName = {
|
|
247
|
+
0: "organism",
|
|
248
|
+
1: "disease",
|
|
249
|
+
2: "tissue",
|
|
250
|
+
3: "brain_region",
|
|
251
|
+
4: "tech1",
|
|
252
|
+
5: "tech2",
|
|
253
|
+
6: "cohort"
|
|
254
|
+
};
|
|
255
|
+
function resolveColumnName(idx) {
|
|
256
|
+
const name = columnIdxToName[idx];
|
|
257
|
+
if (!name) throw `Invalid columnIdx: ${idx}, must be one of ${Object.keys(columnIdxToName).join(",")}`;
|
|
258
|
+
return name;
|
|
270
259
|
}
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
const cols = line.split(" ");
|
|
279
|
-
if (cols[0]?.startsWith("#Unique identifier")) return;
|
|
280
|
-
const identifier = cols[0].trim();
|
|
281
|
-
const proteinName = cols[4].trim();
|
|
282
|
-
list.push(`${proteinName}: ${identifier}`);
|
|
283
|
-
}
|
|
284
|
-
});
|
|
285
|
-
cohort._proteins = list;
|
|
260
|
+
function buildFilterClause(filters) {
|
|
261
|
+
const conditions = [];
|
|
262
|
+
const params = [];
|
|
263
|
+
for (const f of filters) {
|
|
264
|
+
const colName = resolveColumnName(f.columnIdx);
|
|
265
|
+
conditions.push(`${colName} = ?`);
|
|
266
|
+
params.push(f.columnValue);
|
|
286
267
|
}
|
|
268
|
+
return { conditions, params };
|
|
269
|
+
}
|
|
270
|
+
function findProteinsInCohort(db, proteins) {
|
|
287
271
|
const matches = [];
|
|
288
272
|
for (const p of proteins) {
|
|
289
273
|
if (!p) continue;
|
|
290
|
-
const
|
|
291
|
-
for (const
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
matches.push(entry);
|
|
274
|
+
const rows = db.prepare("SELECT DISTINCT gene, identifier FROM proteome_abundance WHERE gene LIKE ? COLLATE NOCASE").all(`%${p}%`);
|
|
275
|
+
for (const row of rows) {
|
|
276
|
+
if (row.gene.toLowerCase().includes(p.toLowerCase())) {
|
|
277
|
+
matches.push(`${row.gene}: ${row.identifier}`);
|
|
295
278
|
}
|
|
296
279
|
}
|
|
297
280
|
}
|
|
298
281
|
return matches;
|
|
299
282
|
}
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
for (const m of matches) unique.add(m);
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
return [...unique];
|
|
283
|
+
function queryDbRows(db, matchColumn, matchValue, filters) {
|
|
284
|
+
console.log(`Querying DB for ${matchColumn}=${matchValue} with filters:`, filters);
|
|
285
|
+
const { conditions, params } = buildFilterClause(filters);
|
|
286
|
+
const allConditions = [`${matchColumn} = ? COLLATE NOCASE`, ...conditions];
|
|
287
|
+
const sql = `SELECT identifier, protein_accession, modsite, gene, sample, value
|
|
288
|
+
FROM proteome_abundance
|
|
289
|
+
WHERE ${allConditions.join(" AND ")}`;
|
|
290
|
+
console.log("Executing SQL:", sql);
|
|
291
|
+
return db.prepare(sql).all(matchValue, ...params);
|
|
313
292
|
}
|
|
314
|
-
async function getProteomeValuesFromCohort(ds,
|
|
315
|
-
const
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
}
|
|
319
|
-
const bySampleId = {};
|
|
320
|
-
const samples = cohort.samples || [];
|
|
321
|
-
if (limitSamples) {
|
|
322
|
-
for (const sid of limitSamples) bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
|
|
323
|
-
} else {
|
|
324
|
-
for (const sid of samples) bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
|
|
325
|
-
}
|
|
293
|
+
async function getProteomeValuesFromCohort(ds, param) {
|
|
294
|
+
const db = ds.queries.proteome.db;
|
|
295
|
+
const { assayName, cohortName, PTMType, cohortControlFilter, cohortCaseFilter, assayColumnIdx, assayColumnValue } = param.proteomeDetails;
|
|
296
|
+
const assayFilter = [{ columnIdx: assayColumnIdx, columnValue: assayColumnValue }];
|
|
326
297
|
const term2sample2value = /* @__PURE__ */ new Map();
|
|
327
|
-
const { PTMType, cohort: cohortName, assay: assayName } = param.proteomeDetails;
|
|
328
298
|
const allEntries = [];
|
|
299
|
+
const controlSampleIds = /* @__PURE__ */ new Set();
|
|
329
300
|
for (const tw of param.terms) {
|
|
330
301
|
if (!tw) continue;
|
|
331
|
-
const
|
|
332
|
-
const identifier =
|
|
333
|
-
const geneName =
|
|
302
|
+
const fullGeneName = tw.term.name;
|
|
303
|
+
const identifier = fullGeneName.split(":")[1]?.trim();
|
|
304
|
+
const geneName = fullGeneName.split(":")[0]?.trim();
|
|
334
305
|
if (param.for === "proteinView") {
|
|
335
306
|
if (!geneName) throw "invalid term name for proteome query, gene name missing";
|
|
336
307
|
} else {
|
|
337
308
|
if (!identifier || !geneName)
|
|
338
309
|
throw "invalid term name for proteome query, must be in format geneName: uniqueIdentifier";
|
|
339
310
|
}
|
|
340
|
-
const
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
311
|
+
const matchColumn = param.for === "proteinView" ? "gene" : "identifier";
|
|
312
|
+
const matchValue = param.for === "proteinView" ? geneName : identifier;
|
|
313
|
+
const caseRows = queryDbRows(db, matchColumn, matchValue, [...assayFilter, ...cohortCaseFilter]);
|
|
314
|
+
const controlRows = queryDbRows(db, matchColumn, matchValue, [...assayFilter, ...cohortControlFilter]);
|
|
315
|
+
for (const row of controlRows) {
|
|
316
|
+
const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
|
|
317
|
+
if (sid !== void 0) controlSampleIds.add(String(sid));
|
|
318
|
+
}
|
|
319
|
+
const allRows = [...caseRows, ...controlRows];
|
|
320
|
+
const allSampleIds = [];
|
|
321
|
+
for (const row of allRows) {
|
|
322
|
+
const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
|
|
323
|
+
if (sid !== void 0) allSampleIds.push(sid);
|
|
324
|
+
}
|
|
325
|
+
const uniqueSampleIds = [...new Set(allSampleIds)];
|
|
326
|
+
const limitSamples = await mayLimitSamples(param, uniqueSampleIds, ds);
|
|
327
|
+
if (limitSamples?.size == 0) {
|
|
328
|
+
return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
|
|
329
|
+
}
|
|
330
|
+
if (param.for === "proteinView") {
|
|
331
|
+
const entryMap = /* @__PURE__ */ new Map();
|
|
332
|
+
for (const row of allRows) {
|
|
333
|
+
const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
|
|
334
|
+
if (sid === void 0) continue;
|
|
335
|
+
if (limitSamples && !limitSamples.has(sid)) continue;
|
|
336
|
+
if (!entryMap.has(row.identifier)) {
|
|
337
|
+
entryMap.set(row.identifier, {
|
|
338
|
+
uniqueIdentifier: row.identifier,
|
|
365
339
|
assayName,
|
|
366
340
|
cohortName,
|
|
367
341
|
PTMType,
|
|
368
|
-
modSites: PTMType ?
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
//psms: l[5] === undefined || l[5] === '' ? undefined : Number.isNaN(Number(l[5])) ? l[5].trim() : Number(l[5]),
|
|
373
|
-
s2v: rowS2v
|
|
342
|
+
modSites: PTMType ? row.modsite || void 0 : void 0,
|
|
343
|
+
proteinAccession: row.protein_accession,
|
|
344
|
+
geneName: row.gene,
|
|
345
|
+
s2v: {}
|
|
374
346
|
});
|
|
375
|
-
} else {
|
|
376
|
-
for (let i = 9; i < l.length; i++) {
|
|
377
|
-
const sampleId = cohort.samples[i - 9];
|
|
378
|
-
if (limitSamples && !limitSamples.has(sampleId)) continue;
|
|
379
|
-
if (!l[i]) continue;
|
|
380
|
-
const v = Number(l[i]);
|
|
381
|
-
if (Number.isNaN(v)) throw "exp value not number";
|
|
382
|
-
s2v[sampleId] = v;
|
|
383
|
-
}
|
|
384
347
|
}
|
|
348
|
+
entryMap.get(row.identifier).s2v[sid] = row.value;
|
|
385
349
|
}
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
350
|
+
for (const entry of entryMap.values()) allEntries.push(entry);
|
|
351
|
+
} else {
|
|
352
|
+
const s2v = {};
|
|
353
|
+
for (const row of allRows) {
|
|
354
|
+
const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
|
|
355
|
+
if (sid === void 0) continue;
|
|
356
|
+
if (limitSamples && !limitSamples.has(sid)) continue;
|
|
357
|
+
s2v[sid] = row.value;
|
|
358
|
+
}
|
|
359
|
+
if (Object.keys(s2v).length) {
|
|
360
|
+
term2sample2value.set(tw.$id, s2v);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
const bySampleId = {};
|
|
365
|
+
if (param.for === "proteinView") {
|
|
366
|
+
const sampleIds = /* @__PURE__ */ new Set();
|
|
367
|
+
for (const entry of allEntries) {
|
|
368
|
+
for (const sid of Object.keys(entry.s2v)) sampleIds.add(Number(sid));
|
|
369
|
+
}
|
|
370
|
+
for (const sid of sampleIds) {
|
|
371
|
+
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
|
|
389
372
|
}
|
|
373
|
+
return { allEntries, controlSampleIds, bySampleId };
|
|
390
374
|
}
|
|
391
|
-
if (term2sample2value.size == 0
|
|
375
|
+
if (term2sample2value.size == 0) {
|
|
392
376
|
throw `No data available for: ${param.terms?.map((t) => t.term.name).join(", ")}`;
|
|
393
377
|
}
|
|
394
|
-
|
|
395
|
-
|
|
378
|
+
for (const s2v of term2sample2value.values()) {
|
|
379
|
+
for (const sid of Object.keys(s2v)) {
|
|
380
|
+
bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(Number(sid)) };
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
return { term2sample2value, controlSampleIds, bySampleId };
|
|
396
384
|
}
|
|
397
385
|
export {
|
|
398
386
|
api,
|