@sjcrh/proteinpaint-server 2.182.2 → 2.183.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.182.2",
3
+ "version": "2.183.1",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -62,11 +62,11 @@
62
62
  },
63
63
  "dependencies": {
64
64
  "@sjcrh/augen": "2.181.1",
65
- "@sjcrh/proteinpaint-python": "2.181.0",
65
+ "@sjcrh/proteinpaint-python": "2.183.0",
66
66
  "@sjcrh/proteinpaint-r": "2.181.0",
67
- "@sjcrh/proteinpaint-rust": "2.182.2",
68
- "@sjcrh/proteinpaint-shared": "2.182.1",
69
- "@sjcrh/proteinpaint-types": "2.182.1",
67
+ "@sjcrh/proteinpaint-rust": "2.183.0",
68
+ "@sjcrh/proteinpaint-shared": "2.183.0",
69
+ "@sjcrh/proteinpaint-types": "2.183.1",
70
70
  "@types/express": "^5.0.0",
71
71
  "@types/express-session": "^1.18.1",
72
72
  "better-sqlite3": "^12.4.1",
@@ -1,10 +1,6 @@
1
- import path from "path";
2
1
  import { termdbProteomePayload } from "#types/checkers";
3
- import { filterJoin, getWrappedTvslst } from "#shared/filter.js";
4
2
  import { get_ds_tdb } from "#src/termdb.js";
5
- import { get_samples } from "#src/termdb.sql.js";
6
3
  import * as utils from "#src/utils.js";
7
- import serverconfig from "#src/serverconfig.js";
8
4
  import { mayLimitSamples } from "#src/mds3.filter.js";
9
5
  const api = {
10
6
  endpoint: "termdb/proteome",
@@ -32,11 +28,16 @@ function init({ genomes }) {
32
28
  const cohorts = [];
33
29
  for (const assayName in ds.queries.proteome.assays) {
34
30
  const assay = ds.queries.proteome.assays[assayName];
35
- for (const cohortName in assay.cohorts || {}) {
31
+ for (const cohort of assay.cohorts) {
36
32
  const details = {
37
- assay: assayName,
38
- cohort: cohortName,
39
- PTMType: assay.PTMType
33
+ dbfile: ds.queries.proteome.dbfile,
34
+ assayName,
35
+ cohortName: cohort.cohortName,
36
+ cohortControlFilter: cohort.controlFilter,
37
+ cohortCaseFilter: cohort.caseFilter,
38
+ PTMType: assay.PTMType,
39
+ assayColumnIdx: assay.columnIdx,
40
+ assayColumnValue: assay.columnValue
40
41
  };
41
42
  const tw = {
42
43
  $id: "_",
@@ -46,7 +47,7 @@ function init({ genomes }) {
46
47
  proteomeDetails: details
47
48
  }
48
49
  };
49
- const allData = await ds.queries.proteome.get({
50
+ const cohortData = await ds.queries.proteome.get({
50
51
  terms: [tw],
51
52
  proteomeDetails: details,
52
53
  filter: q.filter,
@@ -54,27 +55,17 @@ function init({ genomes }) {
54
55
  for: "proteinView",
55
56
  __abortSignal: q.__abortSignal
56
57
  });
57
- const filterConfig = assay.cohorts[cohortName]?.ctlFilter;
58
- const ctlFilter = filterConfig && Array.isArray(filterConfig) && filterConfig.length ? getWrappedTvslst(
59
- filterConfig.map((tvs) => ({ type: "tvs", tvs })),
60
- filterConfig.length > 1 ? "and" : ""
61
- ) : null;
62
- let controlSampleIds = /* @__PURE__ */ new Set();
63
- if (ctlFilter) {
64
- const controlFilter = filterJoin([q.filter, ctlFilter].filter((f) => !!f));
65
- const controlSamples = await get_samples({ filter: controlFilter }, ds);
66
- controlSampleIds = new Set(controlSamples.map((i) => String(i.id)));
67
- }
68
- for (const cohortData of allData.allEntries || []) {
69
- const s2v = cohortData.s2v;
58
+ const controlSampleIds = cohortData.controlSampleIds || /* @__PURE__ */ new Set();
59
+ for (const entry of cohortData.allEntries || []) {
60
+ const s2v = entry.s2v;
70
61
  const stats = getCohortStats(s2v, controlSampleIds);
71
- delete cohortData.s2v;
72
- cohortData.foldChange = stats.foldChange;
73
- cohortData.pValue = stats.pValue;
74
- cohortData.testedN = stats.testedN;
75
- cohortData.controlN = stats.controlN;
76
- if (assay.mclassOverride) cohortData.mclassOverride = assay.mclassOverride;
77
- cohorts.push(cohortData);
62
+ delete entry.s2v;
63
+ entry.foldChange = stats.foldChange;
64
+ entry.pValue = stats.pValue;
65
+ entry.testedN = stats.testedN;
66
+ entry.controlN = stats.controlN;
67
+ if (assay.mclassOverride) entry.mclassOverride = assay.mclassOverride;
68
+ cohorts.push(entry);
78
69
  }
79
70
  }
80
71
  }
@@ -213,186 +204,183 @@ async function validate_query_proteome(ds) {
213
204
  if (!q.assays) {
214
205
  throw "queries.proteome.assays is missing";
215
206
  }
207
+ if (!q.dbfile) {
208
+ throw "queries.proteome.dbfile is missing";
209
+ }
210
+ try {
211
+ q.db = utils.connect_db(q.dbfile);
212
+ } catch (e) {
213
+ throw `Cannot connect to proteome db ${q.dbfile}: ${e.message || e}`;
214
+ }
216
215
  for (const assayName in q.assays) {
217
216
  const assay = q.assays[assayName];
217
+ if (!assay.columnIdx) throw `queries.proteome.assays.${assayName}.columnIdx missing`;
218
+ if (!assay.columnValue) throw `queries.proteome.assays.${assayName}.columnValue missing`;
218
219
  if (assay.cohorts) {
219
220
  console.log(`Validating assay "${assayName}" with multiple cohorts`);
220
- for (const cohortName in assay.cohorts) {
221
- const cohort = assay.cohorts[cohortName];
222
- if (!cohort.file) {
223
- throw `Missing file in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
224
- }
225
- await validateCohortFile(ds, assayName, cohortName, cohort);
221
+ for (const cohort of assay.cohorts) {
222
+ if (!cohort.cohortName) throw `Missing cohortName in queries.proteome.assays.${assayName}.cohorts`;
223
+ if (!cohort.controlFilter)
224
+ throw `Missing controlFilter in queries.proteome.assays.${assayName}.cohorts.${cohort.cohortName}`;
225
+ if (!cohort.caseFilter)
226
+ throw `Missing caseFilter in queries.proteome.assays.${assayName}.cohorts.${cohort.cohortName}`;
226
227
  }
227
228
  } else {
228
229
  throw `Invalid assay structure for "${assayName}". Must have .cohorts`;
229
230
  }
230
231
  }
231
- q.getCohort = (proteomeDetails) => {
232
- const assay = proteomeDetails?.assay;
233
- const cohort = proteomeDetails?.cohort;
234
- if (!assay || !cohort) throw "proteomeDetails.{assay,cohort} missing";
235
- const cohortQuery = q.assays?.[assay]?.cohorts?.[cohort];
236
- if (!cohortQuery) throw `queries.proteome.assays.${assay}.cohorts.${cohort} missing for the dataset`;
237
- return cohortQuery;
238
- };
239
232
  q.find = async (arg) => {
240
233
  const proteins = arg?.proteins;
241
234
  if (!Array.isArray(proteins) || proteins.length == 0) throw "queries.proteome.find arg.proteins[] missing";
242
- if (arg?.proteomeDetails) {
243
- const cohortQuery = q.getCohort(arg.proteomeDetails);
244
- return findProteinsInCohort(cohortQuery, proteins);
245
- }
246
- return findProteinsAcrossNonPTMCohorts(q, proteins);
235
+ return findProteinsInCohort(q.db, proteins);
247
236
  };
248
237
  q.get = async (param) => {
249
238
  if (!param?.terms?.length) throw "queries.proteome.get param.terms[] missing";
250
- if (!param.proteomeDetails?.assay || !param.proteomeDetails?.cohort)
251
- throw "queries.proteome.get param.proteomeDetails.{assay,cohort} missing";
252
- const cohortQuery = q.getCohort(param.proteomeDetails);
253
- return getProteomeValuesFromCohort(ds, cohortQuery, param);
239
+ if (!param.proteomeDetails?.assayName || !param.proteomeDetails?.cohortName)
240
+ throw "queries.proteome.get param.proteomeDetails.{assayName,cohortName} missing";
241
+ if (!param.proteomeDetails?.cohortControlFilter || !param.proteomeDetails?.cohortCaseFilter || !param.proteomeDetails?.assayColumnIdx || !param.proteomeDetails?.assayColumnValue)
242
+ throw "queries.proteome.get param.proteomeDetails.{cohortControlFilter, cohortCaseFilter, assayColumnIdx, assayColumnValue} missing";
243
+ return await getProteomeValuesFromCohort(ds, param);
254
244
  };
255
245
  }
256
- async function validateCohortFile(ds, assayName, cohortName, cohort) {
257
- if (!cohort.file.startsWith(serverconfig.tpmasterdir)) cohort.file = path.join(serverconfig.tpmasterdir, cohort.file);
258
- await utils.validate_txtfile(cohort.file);
259
- const headerLine = await utils.get_header_txt(cohort.file);
260
- const l = headerLine.split(" ");
261
- cohort.samples = [];
262
- for (let i = 9; i < l.length; i++) {
263
- const sampleName = l[i];
264
- const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
265
- if (sampleId == void 0) {
266
- throw `queries.proteome.assays.${assayName}.cohorts.${cohortName}: unknown sample from header: ${sampleName}`;
267
- }
268
- cohort.samples.push(sampleId);
269
- }
246
+ const columnIdxToName = {
247
+ 0: "organism",
248
+ 1: "disease",
249
+ 2: "tissue",
250
+ 3: "brain_region",
251
+ 4: "tech1",
252
+ 5: "tech2",
253
+ 6: "cohort"
254
+ };
255
+ function resolveColumnName(idx) {
256
+ const name = columnIdxToName[idx];
257
+ if (!name) throw `Invalid columnIdx: ${idx}, must be one of ${Object.keys(columnIdxToName).join(",")}`;
258
+ return name;
270
259
  }
271
- async function findProteinsInCohort(cohort, proteins) {
272
- if (!cohort._proteins) {
273
- const list = [];
274
- await utils.get_lines_txtfile({
275
- args: [cohort.file],
276
- dir: void 0,
277
- callback: (line) => {
278
- const cols = line.split(" ");
279
- if (cols[0]?.startsWith("#Unique identifier")) return;
280
- const identifier = cols[0].trim();
281
- const proteinName = cols[4].trim();
282
- list.push(`${proteinName}: ${identifier}`);
283
- }
284
- });
285
- cohort._proteins = list;
260
+ function buildFilterClause(filters) {
261
+ const conditions = [];
262
+ const params = [];
263
+ for (const f of filters) {
264
+ const colName = resolveColumnName(f.columnIdx);
265
+ conditions.push(`${colName} = ?`);
266
+ params.push(f.columnValue);
286
267
  }
268
+ return { conditions, params };
269
+ }
270
+ function findProteinsInCohort(db, proteins) {
287
271
  const matches = [];
288
272
  for (const p of proteins) {
289
273
  if (!p) continue;
290
- const lowerP = p.toLowerCase();
291
- for (const entry of cohort._proteins) {
292
- const proteinName = entry.split(":")[0];
293
- if (proteinName.toLowerCase().includes(lowerP)) {
294
- matches.push(entry);
274
+ const rows = db.prepare("SELECT DISTINCT gene, identifier FROM proteome_abundance WHERE gene LIKE ? COLLATE NOCASE").all(`%${p}%`);
275
+ for (const row of rows) {
276
+ if (row.gene.toLowerCase().includes(p.toLowerCase())) {
277
+ matches.push(`${row.gene}: ${row.identifier}`);
295
278
  }
296
279
  }
297
280
  }
298
281
  return matches;
299
282
  }
300
- async function findProteinsAcrossNonPTMCohorts(q, proteins) {
301
- const unique = /* @__PURE__ */ new Set();
302
- for (const assayName in q.assays || {}) {
303
- const assay = q.assays[assayName];
304
- if (assay.PTMType) continue;
305
- const cohorts = assay?.cohorts || {};
306
- for (const cohortName in cohorts) {
307
- const cohort = cohorts[cohortName];
308
- const matches = await findProteinsInCohort(cohort, proteins);
309
- for (const m of matches) unique.add(m);
310
- }
311
- }
312
- return [...unique];
283
+ function queryDbRows(db, matchColumn, matchValue, filters) {
284
+ console.log(`Querying DB for ${matchColumn}=${matchValue} with filters:`, filters);
285
+ const { conditions, params } = buildFilterClause(filters);
286
+ const allConditions = [`${matchColumn} = ? COLLATE NOCASE`, ...conditions];
287
+ const sql = `SELECT identifier, protein_accession, modsite, gene, sample, value
288
+ FROM proteome_abundance
289
+ WHERE ${allConditions.join(" AND ")}`;
290
+ console.log("Executing SQL:", sql);
291
+ return db.prepare(sql).all(matchValue, ...params);
313
292
  }
314
- async function getProteomeValuesFromCohort(ds, cohort, param) {
315
- const limitSamples = await mayLimitSamples(param, cohort.samples, ds);
316
- if (limitSamples?.size == 0) {
317
- return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
318
- }
319
- const bySampleId = {};
320
- const samples = cohort.samples || [];
321
- if (limitSamples) {
322
- for (const sid of limitSamples) bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
323
- } else {
324
- for (const sid of samples) bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
325
- }
293
+ async function getProteomeValuesFromCohort(ds, param) {
294
+ const db = ds.queries.proteome.db;
295
+ const { assayName, cohortName, PTMType, cohortControlFilter, cohortCaseFilter, assayColumnIdx, assayColumnValue } = param.proteomeDetails;
296
+ const assayFilter = [{ columnIdx: assayColumnIdx, columnValue: assayColumnValue }];
326
297
  const term2sample2value = /* @__PURE__ */ new Map();
327
- const { PTMType, cohort: cohortName, assay: assayName } = param.proteomeDetails;
328
298
  const allEntries = [];
299
+ const controlSampleIds = /* @__PURE__ */ new Set();
329
300
  for (const tw of param.terms) {
330
301
  if (!tw) continue;
331
- const fullEntry = tw.term.name;
332
- const identifier = fullEntry.split(":")[1]?.trim();
333
- const geneName = fullEntry.split(":")[0]?.trim();
302
+ const fullGeneName = tw.term.name;
303
+ const identifier = fullGeneName.split(":")[1]?.trim();
304
+ const geneName = fullGeneName.split(":")[0]?.trim();
334
305
  if (param.for === "proteinView") {
335
306
  if (!geneName) throw "invalid term name for proteome query, gene name missing";
336
307
  } else {
337
308
  if (!identifier || !geneName)
338
309
  throw "invalid term name for proteome query, must be in format geneName: uniqueIdentifier";
339
310
  }
340
- const s2v = {};
341
- await utils.get_lines_txtfile({
342
- args: [cohort.file],
343
- dir: void 0,
344
- callback: (line) => {
345
- const l = line.split(" ");
346
- if (param.for === "proteinView") {
347
- if (l[4]?.trim().toLowerCase() !== geneName.toLowerCase()) return;
348
- } else if (l[0]?.trim().toLowerCase() !== identifier.toLowerCase()) {
349
- return;
350
- }
351
- if (param.for === "proteinView") {
352
- const uniqueIdentifier = l[0]?.trim();
353
- if (!uniqueIdentifier) throw "missing unique identifier for PTM row";
354
- const rowS2v = {};
355
- for (let i = 9; i < l.length; i++) {
356
- const sampleId = cohort.samples[i - 9];
357
- if (limitSamples && !limitSamples.has(sampleId)) continue;
358
- if (!l[i]) continue;
359
- const v = Number(l[i]);
360
- if (Number.isNaN(v)) throw "exp value not number";
361
- rowS2v[sampleId] = v;
362
- }
363
- allEntries.push({
364
- uniqueIdentifier,
311
+ const matchColumn = param.for === "proteinView" ? "gene" : "identifier";
312
+ const matchValue = param.for === "proteinView" ? geneName : identifier;
313
+ const caseRows = queryDbRows(db, matchColumn, matchValue, [...assayFilter, ...cohortCaseFilter]);
314
+ const controlRows = queryDbRows(db, matchColumn, matchValue, [...assayFilter, ...cohortControlFilter]);
315
+ for (const row of controlRows) {
316
+ const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
317
+ if (sid !== void 0) controlSampleIds.add(String(sid));
318
+ }
319
+ const allRows = [...caseRows, ...controlRows];
320
+ const allSampleIds = [];
321
+ for (const row of allRows) {
322
+ const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
323
+ if (sid !== void 0) allSampleIds.push(sid);
324
+ }
325
+ const uniqueSampleIds = [...new Set(allSampleIds)];
326
+ const limitSamples = await mayLimitSamples(param, uniqueSampleIds, ds);
327
+ if (limitSamples?.size == 0) {
328
+ return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
329
+ }
330
+ if (param.for === "proteinView") {
331
+ const entryMap = /* @__PURE__ */ new Map();
332
+ for (const row of allRows) {
333
+ const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
334
+ if (sid === void 0) continue;
335
+ if (limitSamples && !limitSamples.has(sid)) continue;
336
+ if (!entryMap.has(row.identifier)) {
337
+ entryMap.set(row.identifier, {
338
+ uniqueIdentifier: row.identifier,
365
339
  assayName,
366
340
  cohortName,
367
341
  PTMType,
368
- modSites: PTMType ? l[1]?.trim() : void 0,
369
- category: !PTMType ? l[1]?.trim() : void 0,
370
- proteinAccession: l[2]?.trim(),
371
- geneName: l[4]?.trim(),
372
- //psms: l[5] === undefined || l[5] === '' ? undefined : Number.isNaN(Number(l[5])) ? l[5].trim() : Number(l[5]),
373
- s2v: rowS2v
342
+ modSites: PTMType ? row.modsite || void 0 : void 0,
343
+ proteinAccession: row.protein_accession,
344
+ geneName: row.gene,
345
+ s2v: {}
374
346
  });
375
- } else {
376
- for (let i = 9; i < l.length; i++) {
377
- const sampleId = cohort.samples[i - 9];
378
- if (limitSamples && !limitSamples.has(sampleId)) continue;
379
- if (!l[i]) continue;
380
- const v = Number(l[i]);
381
- if (Number.isNaN(v)) throw "exp value not number";
382
- s2v[sampleId] = v;
383
- }
384
347
  }
348
+ entryMap.get(row.identifier).s2v[sid] = row.value;
385
349
  }
386
- });
387
- if (param.for !== "proteinView" && Object.keys(s2v).length) {
388
- term2sample2value.set(tw.$id, s2v);
350
+ for (const entry of entryMap.values()) allEntries.push(entry);
351
+ } else {
352
+ const s2v = {};
353
+ for (const row of allRows) {
354
+ const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
355
+ if (sid === void 0) continue;
356
+ if (limitSamples && !limitSamples.has(sid)) continue;
357
+ s2v[sid] = row.value;
358
+ }
359
+ if (Object.keys(s2v).length) {
360
+ term2sample2value.set(tw.$id, s2v);
361
+ }
362
+ }
363
+ }
364
+ const bySampleId = {};
365
+ if (param.for === "proteinView") {
366
+ const sampleIds = /* @__PURE__ */ new Set();
367
+ for (const entry of allEntries) {
368
+ for (const sid of Object.keys(entry.s2v)) sampleIds.add(Number(sid));
369
+ }
370
+ for (const sid of sampleIds) {
371
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
389
372
  }
373
+ return { allEntries, controlSampleIds, bySampleId };
390
374
  }
391
- if (term2sample2value.size == 0 && param.for != "proteinView") {
375
+ if (term2sample2value.size == 0) {
392
376
  throw `No data available for: ${param.terms?.map((t) => t.term.name).join(", ")}`;
393
377
  }
394
- if (param.for === "proteinView") return { allEntries, bySampleId };
395
- else return { term2sample2value, bySampleId };
378
+ for (const s2v of term2sample2value.values()) {
379
+ for (const sid of Object.keys(s2v)) {
380
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(Number(sid)) };
381
+ }
382
+ }
383
+ return { term2sample2value, controlSampleIds, bySampleId };
396
384
  }
397
385
  export {
398
386
  api,