@sjcrh/proteinpaint-server 2.185.0 → 2.186.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.185.0",
3
+ "version": "2.186.0",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -64,9 +64,9 @@
64
64
  "@sjcrh/augen": "2.181.1",
65
65
  "@sjcrh/proteinpaint-python": "2.185.0",
66
66
  "@sjcrh/proteinpaint-r": "2.181.0",
67
- "@sjcrh/proteinpaint-rust": "2.185.0",
68
- "@sjcrh/proteinpaint-shared": "2.185.0",
69
- "@sjcrh/proteinpaint-types": "2.185.0",
67
+ "@sjcrh/proteinpaint-rust": "2.186.0",
68
+ "@sjcrh/proteinpaint-shared": "2.186.0",
69
+ "@sjcrh/proteinpaint-types": "2.186.0",
70
70
  "@types/express": "^5.0.0",
71
71
  "@types/express-session": "^1.18.1",
72
72
  "better-sqlite3": "^12.4.1",
@@ -261,18 +261,24 @@ async function validateNative(q, ds) {
261
261
  const vr = JSON.parse(tmp);
262
262
  if (vr.status !== "success") throw vr.message;
263
263
  if (!vr.samples?.length) throw "HDF5 file has no samples, please check file.";
264
+ const unknownSamples = /* @__PURE__ */ new Set();
264
265
  for (const sn of vr.samples) {
265
266
  const si = ds.cohort.termdb.q.sampleName2id(sn);
266
- if (si == void 0) {
267
+ if (si === void 0) {
267
268
  if (ds.cohort.db) {
268
269
  throw `unknown sample ${sn} from HDF5 ${q.file}`;
269
270
  } else {
271
+ unknownSamples.add(sn);
270
272
  continue;
271
273
  }
272
274
  }
273
275
  q.samples.push(si);
274
276
  }
275
277
  console.log(`${ds.label}: geneExpression HDF5 file validated. Format: ${vr.format}, Samples:`, q.samples.length);
278
+ if (unknownSamples.size) {
279
+ const arr = [...unknownSamples];
280
+ console.log(`unknown samples from geneExpression HDF5 file (${arr.length}): ${arr.join(", ")}`);
281
+ }
276
282
  } catch (error) {
277
283
  throw `${ds.label}: Failed to validate geneExpression HDF5 file: ${error}`;
278
284
  }
@@ -195,23 +195,34 @@ function addNonDictionaryQueries(c, ds, genome) {
195
195
  }
196
196
  if (q.proteome) {
197
197
  q2.proteome = {};
198
- if (q.proteome.overlayTerm) {
199
- q2.proteome.overlayTerm = JSON.parse(JSON.stringify(q.proteome.overlayTerm));
200
- }
201
- if (q.proteome.assays) {
202
- q2.proteome.assays = {};
203
- for (const assay in q.proteome.assays) {
204
- q2.proteome.assays[assay] = {};
205
- if (q.proteome.assays[assay].cohorts) {
206
- q2.proteome.assays[assay].cohorts = {};
207
- for (const cohort in q.proteome.assays[assay].cohorts) {
208
- q2.proteome.assays[assay].cohorts[cohort] = {};
209
- const src = q.proteome.assays[assay].cohorts[cohort];
210
- if ("controlFilter" in src) {
211
- q2.proteome.assays[assay].cohorts[cohort].controlFilter = JSON.parse(JSON.stringify(src.controlFilter));
212
- }
213
- if ("caseFilter" in src) {
214
- q2.proteome.assays[assay].cohorts[cohort].caseFilter = JSON.parse(JSON.stringify(src.caseFilter));
198
+ if (q.proteome.organisms) {
199
+ q2.proteome.organisms = {};
200
+ for (const organism in q.proteome.organisms) {
201
+ q2.proteome.organisms[organism] = {};
202
+ const orgSrc = q.proteome.organisms[organism];
203
+ if (orgSrc.overlayTerm) {
204
+ q2.proteome.organisms[organism].overlayTerm = JSON.parse(JSON.stringify(orgSrc.overlayTerm));
205
+ }
206
+ if (orgSrc.assays) {
207
+ q2.proteome.organisms[organism].assays = {};
208
+ for (const assay in orgSrc.assays) {
209
+ q2.proteome.organisms[organism].assays[assay] = {};
210
+ if (orgSrc.assays[assay].cohorts) {
211
+ q2.proteome.organisms[organism].assays[assay].cohorts = {};
212
+ for (const cohort in orgSrc.assays[assay].cohorts) {
213
+ q2.proteome.organisms[organism].assays[assay].cohorts[cohort] = {};
214
+ const src = orgSrc.assays[assay].cohorts[cohort];
215
+ if ("controlFilter" in src) {
216
+ q2.proteome.organisms[organism].assays[assay].cohorts[cohort].controlFilter = JSON.parse(
217
+ JSON.stringify(src.controlFilter)
218
+ );
219
+ }
220
+ if ("caseFilter" in src) {
221
+ q2.proteome.organisms[organism].assays[assay].cohorts[cohort].caseFilter = JSON.parse(
222
+ JSON.stringify(src.caseFilter)
223
+ );
224
+ }
225
+ }
215
226
  }
216
227
  }
217
228
  }
@@ -305,7 +316,7 @@ function getDsAllowedTermTypes(ds) {
305
316
  if (ds.queries?.geneExpression) typeSet.add(GENE_EXPRESSION);
306
317
  if (ds.queries?.isoformExpression) typeSet.add(ISOFORM_EXPRESSION);
307
318
  if (ds.queries?.metaboliteIntensity) typeSet.add(METABOLITE_INTENSITY);
308
- if (ds.queries?.proteome?.assays) typeSet.add(PROTEOME_ABUNDANCE);
319
+ if (ds.queries?.proteome) typeSet.add(PROTEOME_ABUNDANCE);
309
320
  if (ds.queries?.ssGSEA) typeSet.add(SSGSEA);
310
321
  if (ds.queries?.dnaMethylation) typeSet.add(DNA_METHYLATION);
311
322
  if (ds.queries?.singleCell) {
@@ -26,42 +26,46 @@ function init({ genomes }) {
26
26
  const term = q.term?.term || q.term;
27
27
  if (!term?.name) throw "term.name missing";
28
28
  const cohorts = [];
29
- for (const assayName in ds.queries.proteome.assays) {
30
- const assay = ds.queries.proteome.assays[assayName];
31
- for (const cohortName in assay.cohorts || {}) {
32
- const details = {
33
- dbfile: ds.queries.proteome.dbfile,
34
- assay: assayName,
35
- cohort: cohortName
36
- };
37
- const tw = {
38
- $id: "_",
39
- term: {
40
- name: term.name,
41
- type: "proteomeAbundance",
42
- proteomeDetails: details
29
+ for (const organismName in ds.queries.proteome.organisms) {
30
+ const organism = ds.queries.proteome.organisms[organismName];
31
+ for (const assayName in organism.assays) {
32
+ const assay = organism.assays[assayName];
33
+ for (const cohortName in assay.cohorts || {}) {
34
+ const details = {
35
+ dbfile: ds.queries.proteome.dbfile,
36
+ organism: organismName,
37
+ assay: assayName,
38
+ cohort: cohortName
39
+ };
40
+ const tw = {
41
+ $id: "_",
42
+ term: {
43
+ name: term.name,
44
+ type: "proteomeAbundance",
45
+ proteomeDetails: details
46
+ }
47
+ };
48
+ const cohortData = await ds.queries.proteome.get({
49
+ terms: [tw],
50
+ proteomeDetails: details,
51
+ filter: q.filter,
52
+ filter0: q.filter0,
53
+ for: "proteinView",
54
+ __abortSignal: q.__abortSignal
55
+ });
56
+ const controlSampleIds = cohortData.controlSampleIds || /* @__PURE__ */ new Set();
57
+ const prior = assay.cohorts[cohortName].prior;
58
+ for (const entry of cohortData.allEntries || []) {
59
+ const s2v = entry.s2v;
60
+ const stats = getCohortStats(s2v, controlSampleIds, prior);
61
+ delete entry.s2v;
62
+ entry.foldChange = stats.foldChange;
63
+ entry.pValue = stats.pValue;
64
+ entry.testedN = stats.testedN;
65
+ entry.controlN = stats.controlN;
66
+ if (assay.mclassOverride) entry.mclassOverride = assay.mclassOverride;
67
+ cohorts.push(entry);
43
68
  }
44
- };
45
- const cohortData = await ds.queries.proteome.get({
46
- terms: [tw],
47
- proteomeDetails: details,
48
- filter: q.filter,
49
- filter0: q.filter0,
50
- for: "proteinView",
51
- __abortSignal: q.__abortSignal
52
- });
53
- const controlSampleIds = cohortData.controlSampleIds || /* @__PURE__ */ new Set();
54
- const prior = assay.cohorts[cohortName].prior;
55
- for (const entry of cohortData.allEntries || []) {
56
- const s2v = entry.s2v;
57
- const stats = getCohortStats(s2v, controlSampleIds, prior);
58
- delete entry.s2v;
59
- entry.foldChange = stats.foldChange;
60
- entry.pValue = stats.pValue;
61
- entry.testedN = stats.testedN;
62
- entry.controlN = stats.controlN;
63
- if (assay.mclassOverride) entry.mclassOverride = assay.mclassOverride;
64
- cohorts.push(entry);
65
69
  }
66
70
  }
67
71
  }
@@ -202,8 +206,8 @@ function lnGamma(z) {
202
206
  async function validate_query_proteome(ds) {
203
207
  const q = ds.queries.proteome;
204
208
  if (!q) return;
205
- if (!q.assays) {
206
- throw "queries.proteome.assays is missing";
209
+ if (!q.organisms) {
210
+ throw "queries.proteome.organisms is missing";
207
211
  }
208
212
  if (!q.dbfile) {
209
213
  throw "queries.proteome.dbfile is missing";
@@ -213,21 +217,31 @@ async function validate_query_proteome(ds) {
213
217
  } catch (e) {
214
218
  throw `Cannot connect to proteome db ${q.dbfile}: ${e.message || e}`;
215
219
  }
216
- for (const assayName in q.assays) {
217
- const assay = q.assays[assayName];
218
- if (assay.columnIdx == null) throw `queries.proteome.assays.${assayName}.columnIdx missing`;
219
- if (assay.columnValue == null) throw `queries.proteome.assays.${assayName}.columnValue missing`;
220
- if (assay.cohorts) {
221
- for (const cohortName in assay.cohorts) {
222
- const cohort = assay.cohorts[cohortName];
223
- if (!cohort.controlFilter)
224
- throw `Missing controlFilter in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
225
- if (!cohort.caseFilter) throw `Missing caseFilter in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
226
- if (!cohort.prior?.d0 || !cohort.prior?.s0sq)
227
- throw `Missing prior.d0 and prior.s0sq in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
220
+ for (const organismName in q.organisms) {
221
+ const organism = q.organisms[organismName];
222
+ if (organism.columnIdx == null) throw `queries.proteome.organisms.${organismName}.columnIdx missing`;
223
+ if (organism.columnValue == null) throw `queries.proteome.organisms.${organismName}.columnValue missing`;
224
+ if (!organism.assays || typeof organism.assays != "object")
225
+ throw `queries.proteome.organisms.${organismName}.assays missing or invalid`;
226
+ for (const assayName in organism.assays) {
227
+ const assay = organism.assays[assayName];
228
+ if (assay.columnIdx == null)
229
+ throw `queries.proteome.organisms.${organismName}.assays.${assayName}.columnIdx missing`;
230
+ if (assay.columnValue == null)
231
+ throw `queries.proteome.organisms.${organismName}.assays.${assayName}.columnValue missing`;
232
+ if (assay.cohorts) {
233
+ for (const cohortName in assay.cohorts) {
234
+ const cohort = assay.cohorts[cohortName];
235
+ if (!cohort.controlFilter)
236
+ throw `Missing controlFilter in queries.proteome.organisms.${organismName}.assays.${assayName}.cohorts.${cohortName}`;
237
+ if (!cohort.caseFilter)
238
+ throw `Missing caseFilter in queries.proteome.organisms.${organismName}.assays.${assayName}.cohorts.${cohortName}`;
239
+ if (!cohort.prior?.d0 || !cohort.prior?.s0sq)
240
+ throw `Missing prior.d0 and prior.s0sq in queries.proteome.organisms.${organismName}.assays.${assayName}.cohorts.${cohortName}`;
241
+ }
242
+ } else {
243
+ throw `Invalid assay structure for "${assayName}". Must have .cohorts`;
228
244
  }
229
- } else {
230
- throw `Invalid assay structure for "${assayName}". Must have .cohorts`;
231
245
  }
232
246
  }
233
247
  q.find = async (arg) => {
@@ -235,22 +249,27 @@ async function validate_query_proteome(ds) {
235
249
  if (!Array.isArray(proteins) || proteins.length == 0) throw "queries.proteome.find arg.proteins[] missing";
236
250
  const matches = /* @__PURE__ */ new Set();
237
251
  const details = arg?.proteomeDetails || {};
252
+ const organism = details.organism;
238
253
  const assay = details.assay;
239
254
  const cohort = details.cohort;
240
255
  const MAX_FIND_RESULTS = 500;
241
256
  const filters = [];
242
257
  if (Object.keys(details).length) {
243
- if (!assay || !cohort) throw "queries.proteome.find arg.proteomeDetails.{assay,cohort} missing";
244
- const assayConfig = q.assays?.[assay];
258
+ if (!organism || !assay || !cohort)
259
+ throw "queries.proteome.find arg.proteomeDetails.{organism,assay,cohort} missing";
260
+ const organismConfig = q.organisms?.[organism];
261
+ if (!organismConfig) throw `queries.proteome.find invalid organism: ${organism}`;
262
+ const assayConfig = organismConfig.assays?.[assay];
245
263
  if (!assayConfig) throw `queries.proteome.find invalid assay: ${assay}`;
246
264
  const cohortConfig = assayConfig?.cohorts?.[cohort];
247
265
  if (!cohortConfig) throw `queries.proteome.find invalid cohort: ${cohort}`;
266
+ const organismFilter = [{ columnIdx: organismConfig.columnIdx, columnValue: organismConfig.columnValue }];
248
267
  const assayFilter = [{ columnIdx: assayConfig.columnIdx, columnValue: assayConfig.columnValue }];
249
268
  const cohortFilter = (Array.isArray(cohortConfig.caseFilter) ? cohortConfig.caseFilter : []).filter(
250
269
  (filter) => !!filter
251
270
  );
252
271
  if (!cohortFilter.length) throw `queries.proteome.find invalid cohort caseFilter: ${cohort}`;
253
- filters.push(...assayFilter, ...cohortFilter);
272
+ filters.push(...organismFilter, ...assayFilter, ...cohortFilter);
254
273
  }
255
274
  for (const p of proteins) {
256
275
  if (!p) continue;
@@ -280,8 +299,8 @@ async function validate_query_proteome(ds) {
280
299
  };
281
300
  q.get = async (param) => {
282
301
  if (!param?.terms?.length) throw "queries.proteome.get param.terms[] missing";
283
- if (!param.proteomeDetails?.assay || !param.proteomeDetails?.cohort)
284
- throw "queries.proteome.get param.proteomeDetails.{assay,cohort} missing";
302
+ if (!param.proteomeDetails?.assay || !param.proteomeDetails?.cohort || !param.proteomeDetails?.organism)
303
+ throw "queries.proteome.get param.proteomeDetails.{assay,cohort,organism} missing";
285
304
  return await getProteomeValuesFromCohort(ds, param, q);
286
305
  };
287
306
  }
@@ -312,23 +331,28 @@ function buildFilterClause(filters) {
312
331
  function queryDbRows(db, matchColumn, matchValue, filters) {
313
332
  const { conditions, params } = buildFilterClause(filters);
314
333
  const allConditions = [`${matchColumn} = ? COLLATE NOCASE`, ...conditions];
315
- const sql = `SELECT identifier, protein_accession, isoform, modsite, gene, sample, value
334
+ const sql = `SELECT organism, disease, identifier, protein_accession, isoform, modsite, gene, sample, value
316
335
  FROM proteome_abundance
317
336
  WHERE ${allConditions.join(" AND ")}`;
318
337
  return db.prepare(sql).all(matchValue, ...params);
319
338
  }
320
339
  async function getProteomeValuesFromCohort(ds, param, q) {
321
340
  const db = ds.queries.proteome.db;
322
- const { assay, cohort } = param.proteomeDetails;
323
- const assayConfig = q.assays?.[assay];
341
+ const { assay, cohort, organism } = param.proteomeDetails;
342
+ const organismConfig = q.organisms?.[organism];
343
+ if (!organismConfig) throw `queries.proteome invalid organism: ${organism}`;
344
+ const organismColumnIdx = organismConfig.columnIdx;
345
+ const organismColumnValue = organismConfig.columnValue;
346
+ const assayConfig = organismConfig.assays?.[assay];
324
347
  if (!assayConfig) throw `queries.proteome.get invalid assay: ${assay}`;
325
- const PTMType = q.assays[assay].PTMType;
348
+ const PTMType = assayConfig.PTMType;
326
349
  const assayColumnIdx = assayConfig.columnIdx;
327
350
  const assayColumnValue = assayConfig.columnValue;
328
351
  const cohortConfig = assayConfig?.cohorts?.[cohort];
329
352
  if (!cohortConfig) throw `queries.proteome.get invalid cohort: ${cohort}`;
330
353
  const cohortControlFilter = cohortConfig.controlFilter;
331
354
  const cohortCaseFilter = cohortConfig.caseFilter;
355
+ const organismFilter = [{ columnIdx: organismColumnIdx, columnValue: organismColumnValue }];
332
356
  const assayFilter = [{ columnIdx: assayColumnIdx, columnValue: assayColumnValue }];
333
357
  const term2sample2value = /* @__PURE__ */ new Map();
334
358
  const allEntries = [];
@@ -346,8 +370,12 @@ async function getProteomeValuesFromCohort(ds, param, q) {
346
370
  }
347
371
  const matchColumn = param.for === "proteinView" ? "gene" : "identifier";
348
372
  const matchValue = param.for === "proteinView" ? geneName : identifier;
349
- const caseRows = queryDbRows(db, matchColumn, matchValue, [...assayFilter, ...cohortCaseFilter]);
350
- const controlRows = queryDbRows(db, matchColumn, matchValue, [...assayFilter, ...cohortControlFilter]);
373
+ const caseRows = queryDbRows(db, matchColumn, matchValue, [...organismFilter, ...assayFilter, ...cohortCaseFilter]);
374
+ const controlRows = queryDbRows(db, matchColumn, matchValue, [
375
+ ...organismFilter,
376
+ ...assayFilter,
377
+ ...cohortControlFilter
378
+ ]);
351
379
  for (const row of controlRows) {
352
380
  const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
353
381
  if (sid !== void 0) controlSampleIds.add(String(sid));
@@ -359,8 +387,8 @@ async function getProteomeValuesFromCohort(ds, param, q) {
359
387
  if (sid !== void 0) allSampleIds.push(sid);
360
388
  }
361
389
  const uniqueSampleIds = [...new Set(allSampleIds)];
362
- const limitSamples = await mayLimitSamples(param, uniqueSampleIds, ds);
363
- if (limitSamples?.size == 0) {
390
+ const allowedSampleIds = await mayLimitSamples(param, uniqueSampleIds, ds);
391
+ if (allowedSampleIds?.size == 0) {
364
392
  return { term2sample2value: /* @__PURE__ */ new Map(), byTermId: {}, bySampleId: {} };
365
393
  }
366
394
  if (param.for === "proteinView") {
@@ -368,9 +396,11 @@ async function getProteomeValuesFromCohort(ds, param, q) {
368
396
  for (const row of allRows) {
369
397
  const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
370
398
  if (sid === void 0) continue;
371
- if (limitSamples && !limitSamples.has(sid)) continue;
399
+ if (allowedSampleIds && !allowedSampleIds.has(sid)) continue;
372
400
  if (!entryMap.has(row.identifier)) {
373
401
  entryMap.set(row.identifier, {
402
+ organism: row.organism,
403
+ disease: row.disease,
374
404
  uniqueIdentifier: row.identifier,
375
405
  assayName: assay,
376
406
  cohortName: cohort,
@@ -391,7 +421,7 @@ async function getProteomeValuesFromCohort(ds, param, q) {
391
421
  for (const row of allRows) {
392
422
  const sid = ds.cohort.termdb.q.sampleName2id(row.sample);
393
423
  if (sid === void 0) continue;
394
- if (limitSamples && !limitSamples.has(sid)) continue;
424
+ if (allowedSampleIds && !allowedSampleIds.has(sid)) continue;
395
425
  s2v[sid] = row.value;
396
426
  }
397
427
  if (Object.keys(s2v).length) {
@@ -135,23 +135,24 @@ function validateDataNative(D, ds) {
135
135
  }
136
136
  const file2Lines = {};
137
137
  D.get = async (q) => {
138
+ const sampleId = q.sample?.eID || q.sample?.sID;
138
139
  if (q.checkPlotAvailability) {
139
140
  const plots2 = [];
140
141
  for (const plot of D.plots) {
141
142
  if (!q.plots.includes(plot.name)) continue;
142
- const tsvfile = path.join(
143
- serverconfig.tpmasterdir,
144
- plot.folder,
145
- (q.sample?.eID || q.sample?.sID) + (plot.fileSuffix || "")
146
- );
143
+ const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, sampleId + (plot.fileSuffix || ""));
147
144
  try {
148
145
  await file_is_readable(tsvfile);
149
- plots2.push({
150
- name: plot.name,
151
- expCells: [],
152
- // FIXME avoid breaking client but shouldn't be needed
153
- noExpCells: []
154
- });
146
+ plots2.push({ name: plot.name });
147
+ } catch (_) {
148
+ }
149
+ }
150
+ const imgs = ds.queries.singleCell?.images;
151
+ if (imgs) {
152
+ const imgFile = path.join(serverconfig.tpmasterdir, imgs.folder, sampleId, imgs.fileName);
153
+ try {
154
+ await file_is_readable(imgFile);
155
+ plots2.push({ name: imgs?.label || "Image" });
155
156
  } catch (_) {
156
157
  }
157
158
  }
@@ -166,11 +167,7 @@ function validateDataNative(D, ds) {
166
167
  }
167
168
  for (const plot of D.plots) {
168
169
  if (!q.plots.includes(plot.name)) continue;
169
- const tsvfile = path.join(
170
- serverconfig.tpmasterdir,
171
- plot.folder,
172
- (q.sample?.eID || q.sample?.sID) + (plot.fileSuffix || "")
173
- );
170
+ const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, sampleId + (plot.fileSuffix || ""));
174
171
  if (!file2Lines[tsvfile]) {
175
172
  await file_is_readable(tsvfile);
176
173
  const text = await read_file(tsvfile);
@@ -71,7 +71,6 @@ function expandNumericTermCollection(q, data) {
71
71
  throw new Error("overlayTw is not supported with numeric termCollection; member terms are used as the overlay");
72
72
  if (q.divideTw) throw new Error("divideTw is not supported with numeric termCollection");
73
73
  const termlst = term.termlst || [];
74
- mayLog("termlst", termlst);
75
74
  mayLog(
76
75
  `Expanding numeric termCollection with ${termlst.length} member terms and ${Object.keys(data.samples).length} samples`
77
76
  );