@sjcrh/proteinpaint-server 2.147.2-0 → 2.148.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,436 @@
1
+ import { termdbSampleScatterPayload } from "#types/checkers";
2
+ import { getData } from "../src/termdb.matrix.js";
3
+ import path from "path";
4
+ import serverconfig from "../src/serverconfig.js";
5
+ import { schemeCategory20, getColors } from "#shared/common.js";
6
+ import { mclass, dt2label, morigin } from "#shared/common.js";
7
+ import { authApi } from "../src/auth.js";
8
+ import { run_R } from "@sjcrh/proteinpaint-r";
9
+ import { read_file } from "../src/utils.js";
10
+ import { isNumericTerm } from "@sjcrh/proteinpaint-shared/terms.js";
11
+ import { getDescrStats } from "#routes/termdb.descrstats.ts";
12
+ const api = {
13
+ endpoint: "termdb/sampleScatter",
14
+ methods: {
15
+ get: {
16
+ ...termdbSampleScatterPayload,
17
+ init
18
+ },
19
+ post: {
20
+ ...termdbSampleScatterPayload,
21
+ init
22
+ }
23
+ }
24
+ };
25
+ const refColor = "#F5F5DC";
26
+ function init({ genomes }) {
27
+ return async function(req, res) {
28
+ const q = req.query;
29
+ if (!q.genome || !q.dslabel) {
30
+ throw new Error("Genome and dataset label are required for termdb/sampleScatter request.");
31
+ }
32
+ const g = genomes[q.genome];
33
+ const ds = g.datasets[q.dslabel];
34
+ try {
35
+ let refSamples = [], cohortSamples;
36
+ const terms = [];
37
+ if (q.colorTW) terms.push(q.colorTW);
38
+ if (q.shapeTW) terms.push(q.shapeTW);
39
+ if (q.divideByTW) terms.push(q.divideByTW);
40
+ if (q.scaleDotTW) terms.push(q.scaleDotTW);
41
+ if (q.coordTWs) for (const tw of q.coordTWs) terms.push(tw);
42
+ const data = await getData(
43
+ { filter: q.filter, filter0: q.filter0, terms, __protected__: q.__protected__ },
44
+ ds,
45
+ true
46
+ // FIXME 3rd arg hardcoded to true
47
+ );
48
+ if (data.error) throw data.error;
49
+ let result;
50
+ if (q.coordTWs.length > 0) {
51
+ const tmp = await getSampleCoordinatesByTerms(req, q, ds, data);
52
+ cohortSamples = tmp[0];
53
+ } else {
54
+ if (!q.plotName) throw `Neither plot name or coordinates where provided`;
55
+ if (!Array.isArray(ds.cohort?.scatterplots?.plots)) throw "not supported";
56
+ const plot = ds.cohort.scatterplots.plots.find((p) => p.name == q.plotName);
57
+ if (!plot) throw `plot not found with plotName ${q.plotName}`;
58
+ const tmp = await getSamples(ds, plot);
59
+ refSamples = tmp[0];
60
+ cohortSamples = tmp[1];
61
+ if (q.colorColumn) {
62
+ let categories = new Set(refSamples.map((s) => s.category));
63
+ categories = Array.from(categories);
64
+ const colorMap = {};
65
+ const k2c = getColors(categories.length);
66
+ for (const category of categories) {
67
+ const color = q.colorColumn.colorMap?.[category] || k2c(category);
68
+ colorMap[category] = {
69
+ sampleCount: refSamples.filter((s) => s.category == category).length,
70
+ color,
71
+ key: category
72
+ };
73
+ }
74
+ const shapeMap = { Ref: { shape: 0, sampleCount: refSamples.length, key: "Ref" } };
75
+ result = {
76
+ Default: {
77
+ samples: refSamples,
78
+ colorLegend: Object.entries(colorMap),
79
+ shapeLegend: Object.entries(shapeMap)
80
+ }
81
+ };
82
+ }
83
+ }
84
+ const samples = [...cohortSamples, ...refSamples];
85
+ let range;
86
+ if (samples.length > 0) {
87
+ if (q.excludeOutliers) {
88
+ const ystats = getDescrStats(
89
+ samples.map((s) => s.y),
90
+ q.excludeOutliers
91
+ );
92
+ cohortSamples = cohortSamples.filter(
93
+ (sample) => sample.y > ystats.outlierMin.value && sample.y < ystats.outlierMax.value
94
+ );
95
+ refSamples = refSamples.filter(
96
+ (sample) => sample.y > ystats.outlierMin.value && sample.y < ystats.outlierMax.value
97
+ );
98
+ }
99
+ const s0 = samples[0];
100
+ const [xMin, xMax, yMin, yMax] = samples.reduce(
101
+ (s, d) => [
102
+ d.x < s[0] ? d.x : s[0],
103
+ d.x > s[1] ? d.x : s[1],
104
+ d.y < s[2] ? d.y : s[2],
105
+ d.y > s[3] ? d.y : s[3]
106
+ ],
107
+ [s0.x, s0.x, s0.y, s0.y]
108
+ );
109
+ range = { xMin, xMax, yMin, yMax };
110
+ }
111
+ if (!result) result = await colorAndShapeSamples(refSamples, cohortSamples, data, q);
112
+ res.send({ result, range });
113
+ } catch (e) {
114
+ if (e.stack) console.log(e.stack);
115
+ res.send({ error: e.message || e });
116
+ }
117
+ };
118
+ }
119
+ async function getSamples(ds, plot) {
120
+ if (!plot.filterableSamples) await loadFile(plot, ds);
121
+ return [readSamples(plot.referenceSamples), readSamples(plot.filterableSamples)];
122
+ function readSamples(samples) {
123
+ const result = [];
124
+ for (const i of JSON.parse(JSON.stringify(samples))) {
125
+ result.push(i);
126
+ }
127
+ return result;
128
+ }
129
+ }
130
+ async function colorAndShapeSamples(refSamples, cohortSamples, data, q) {
131
+ const results = {};
132
+ let fCount = 0;
133
+ const hasTerms = Object.keys(data.samples).length > 0;
134
+ for (const sample of cohortSamples) {
135
+ const dbSample = data.samples[sample.sampleId.toString()];
136
+ if (!dbSample && hasTerms) {
137
+ fCount++;
138
+ continue;
139
+ }
140
+ if (q.colorTW && !hasValue(dbSample, q.colorTW) || q.shapeTW && !hasValue(dbSample, q.shapeTW)) continue;
141
+ let divideBy = "Default";
142
+ if (q.divideByTW) {
143
+ sample.z = 0;
144
+ if (q.divideByTW.term.type == "geneVariant" && q.divideByTW.q.type == "values") {
145
+ divideBy = getMutation(true, dbSample, q.divideByTW);
146
+ if (divideBy == null) {
147
+ divideBy = getMutation(false, dbSample, q.divideByTW);
148
+ }
149
+ } else {
150
+ const field = q.divideByTW.$id;
151
+ const key = dbSample[field]?.key;
152
+ if (key == null) continue;
153
+ if (q.divideByTW.q.mode != "continuous") divideBy = q.divideByTW.term.values?.[key]?.label || key;
154
+ else sample.z = key;
155
+ }
156
+ }
157
+ if (divideBy == null || divideBy == void 0) {
158
+ console.log("divideBy is null/undefined for sample " + JSON.stringify(sample));
159
+ continue;
160
+ }
161
+ if (!results[divideBy]) {
162
+ const samples = refSamples.map((sample2) => ({ ...sample2, category: "Ref", shape: "Ref", z: 0 }));
163
+ results[divideBy] = { samples, colorMap: {}, shapeMap: {} };
164
+ }
165
+ if (!q.divideByTW) sample.z = 0;
166
+ if (!q.scaleDotTW) sample.scale = 1;
167
+ else {
168
+ const value = dbSample?.[q.scaleDotTW.$id]?.key;
169
+ if (!value || !isComputable(q.scaleDotTW.term, value)) continue;
170
+ sample.scale = value;
171
+ }
172
+ sample.cat_info = {};
173
+ sample.hidden = {};
174
+ if (!q.colorTW) {
175
+ sample.category = "Default";
176
+ } else {
177
+ if (q.colorTW?.q?.mode === "continuous") {
178
+ if (dbSample) sample.category = dbSample[q.colorTW.$id].value;
179
+ } else processSample(dbSample, sample, q.colorTW, results[divideBy].colorMap, "category");
180
+ }
181
+ if (q.shapeTW) processSample(dbSample, sample, q.shapeTW, results[divideBy].shapeMap, "shape");
182
+ else sample.shape = "Ref";
183
+ results[divideBy].samples.push(sample);
184
+ }
185
+ if (fCount) console.log(fCount + " samples not in the database or filtered");
186
+ let max = 0;
187
+ for (const [_, result] of Object.entries(results)) max = Math.max(max, Object.keys(result.colorMap).length);
188
+ const k2c = getColors(max);
189
+ const scheme = schemeCategory20;
190
+ for (const [_, result] of Object.entries(results)) {
191
+ if (q.colorTW && q.colorTW.q.mode !== "continuous") {
192
+ let i2 = 20;
193
+ for (const [category, value] of Object.entries(result.colorMap)) {
194
+ delete value["sampleIds"];
195
+ let tvalue;
196
+ if (q.colorTW.term.values?.[value.key]) {
197
+ tvalue = q.colorTW.term.values?.[value.key];
198
+ }
199
+ if (tvalue && "color" in tvalue) {
200
+ value.color = tvalue.color;
201
+ } else if (isNumericTerm(q.colorTW.term)) {
202
+ const bins = data.refs.byTermId[q.colorTW.$id].bins;
203
+ const bin = bins.find((bin2) => bin2.label == category);
204
+ if (bin?.color) value.color = bin.color;
205
+ else {
206
+ value.color = scheme[i2 - 1];
207
+ i2--;
208
+ }
209
+ } else if (!(q.colorTW.term.type == "geneVariant" && q.colorTW.q.type == "values")) {
210
+ value.color = k2c(category);
211
+ }
212
+ }
213
+ }
214
+ let i = 0;
215
+ const shapes = Object.entries(result.shapeMap).sort((a, b) => a[0].localeCompare(b[0]));
216
+ for (const [_2, value] of shapes) {
217
+ delete value["sampleIds"];
218
+ if ("shape" in value) continue;
219
+ if (q.shapeTW.term.values?.[value.key]?.shape != void 0)
220
+ value.shape = q.shapeTW.term.values?.[value.key].shape;
221
+ else value.shape = i;
222
+ i++;
223
+ }
224
+ result.colorLegend = q.colorTW ? order(result.colorMap, q.colorTW, data.refs) : [["Default", { sampleCount: cohortSamples.length, color: "blue", key: "Default" }]];
225
+ result.colorLegend.push([
226
+ "Ref",
227
+ {
228
+ sampleCount: refSamples.length,
229
+ color: q.colorTW?.term.values?.["Ref"] ? q.colorTW.term.values?.["Ref"].color : refColor,
230
+ key: "Ref"
231
+ }
232
+ ]);
233
+ result.shapeLegend = shapes;
234
+ result.shapeLegend.push(["Ref", { sampleCount: refSamples.length, shape: 0, key: "Ref" }]);
235
+ }
236
+ return results;
237
+ }
238
+ function hasValue(dbSample, tw) {
239
+ const key = tw && tw.$id !== void 0 ? dbSample?.[tw.$id]?.key : void 0;
240
+ const hasKey = key !== void 0;
241
+ return hasKey;
242
+ }
243
+ function processSample(dbSample, sample, tw, categoryMap, category) {
244
+ let value = null;
245
+ if (tw.term.type == "geneVariant" && tw.q["type"] == "values")
246
+ assignGeneVariantValue(dbSample, sample, tw, categoryMap, category);
247
+ else {
248
+ value = dbSample?.[tw.$id]?.key;
249
+ if (value == null) return;
250
+ if (tw.term.values?.[value]?.label) {
251
+ value = tw.term.values?.[value]?.label;
252
+ sample.hidden[category] = tw.q.hiddenValues ? value in tw.q.hiddenValues : false;
253
+ } else sample.hidden[category] = tw.q.hiddenValues ? dbSample?.[tw.$id]?.key in tw.q.hiddenValues : false;
254
+ if (value) {
255
+ sample[category] = value.toString();
256
+ if (categoryMap[value] == void 0) categoryMap[value] = { sampleCount: 1, key: dbSample?.[tw.$id]?.key };
257
+ else categoryMap[value].sampleCount++;
258
+ }
259
+ }
260
+ }
261
+ function assignGeneVariantValue(dbSample, sample, tw, categoryMap, category) {
262
+ if (tw.term.type == "geneVariant") {
263
+ const mutations = dbSample?.[tw.$id]?.values;
264
+ sample.cat_info[category] = [];
265
+ for (const mutation of mutations) {
266
+ const class_info = mclass[mutation.class];
267
+ const value = getCategory(mutation);
268
+ sample.cat_info[category].push(mutation);
269
+ let mapValue;
270
+ if (categoryMap[value] == void 0) {
271
+ const sampleIds = /* @__PURE__ */ new Set();
272
+ sampleIds.add(dbSample.sample);
273
+ mapValue = { color: class_info.color, sampleCount: 1, mutation, key: value, sampleIds };
274
+ categoryMap[value] = mapValue;
275
+ } else {
276
+ mapValue = categoryMap[value];
277
+ mapValue.sampleIds.add(dbSample.sample);
278
+ mapValue.sampleCount = mapValue.sampleIds.size;
279
+ }
280
+ }
281
+ sample[category] = getMutation(true, dbSample, tw) || getMutation(false, dbSample, tw);
282
+ if (!sample[category]) sample[category] = getCategory(mutations[0]);
283
+ sample.hidden[category] = tw.q.hiddenValues ? sample[category] in tw.q.hiddenValues : false;
284
+ }
285
+ }
286
+ function getMutation(strict, dbSample, tw) {
287
+ const mutations = dbSample?.[tw.$id]?.values;
288
+ for (const [dt, _] of Object.entries(dt2label)) {
289
+ const mutation = mutations.find((mutation2) => {
290
+ const value2 = getCategory(mutation2);
291
+ const visible = !(tw.q.hiddenValues && value2 in tw.q.hiddenValues);
292
+ return mutation2.dt == dt && visible;
293
+ });
294
+ if (!mutation) continue;
295
+ const notImportant = mutation.class == "WT" || mutation.class == "Blank";
296
+ if (strict && notImportant) continue;
297
+ const value = getCategory(mutation);
298
+ return value;
299
+ }
300
+ }
301
+ function getCategory(mutation) {
302
+ const dt = mutation.dt;
303
+ const class_info = mclass[mutation.class];
304
+ const origin = morigin[mutation.origin]?.label;
305
+ const dtlabel = origin ? `${origin[0]} ${dt2label[dt]}` : dt2label[dt];
306
+ return `${class_info.label}, ${dtlabel}`;
307
+ }
308
+ function order(map, tw, refs) {
309
+ const hasOrder = tw?.term?.values ? Object.keys(tw.term.values).some((key) => tw.term.values[key].order != void 0) : false;
310
+ let entries = [];
311
+ if (!tw || map.size == 0) return entries;
312
+ entries = Object.entries(map);
313
+ if (hasOrder) {
314
+ entries.sort((a, b) => {
315
+ let v1, v2;
316
+ for (const key in tw.term.values) {
317
+ const value = tw.term.values[key];
318
+ if (value.label && a[0] == value.label) v1 = value;
319
+ else if (key == a[0]) v1 = value;
320
+ if (value.label && b[0] == value.label) v2 = value;
321
+ else if (key == b[0]) v2 = value;
322
+ }
323
+ if (v1?.order < v2?.order) return -1;
324
+ else if (v1?.order > v2?.order) return 1;
325
+ else if (v1 > v2) return 1;
326
+ else if (v1 < v2) return -1;
327
+ return 0;
328
+ });
329
+ } else if (refs?.byTermId[tw.$id]?.bins) {
330
+ const bins = refs.byTermId[tw.$id].bins;
331
+ entries.sort((a, b) => {
332
+ const binA = bins.findIndex((bin) => bin.label == a[0]);
333
+ const binB = bins.findIndex((bin) => bin.label == b[0]);
334
+ if (binA == -1) return 1;
335
+ if (binB == -1) return -1;
336
+ return binA - binB;
337
+ });
338
+ } else {
339
+ entries.sort((a, b) => a[0].localeCompare(b[0]));
340
+ }
341
+ return entries;
342
+ }
343
+ async function getSampleCoordinatesByTerms(req, q, ds, data) {
344
+ if (!q.coordTWs || q.coordTWs.length == 0) return [[], data];
345
+ const canDisplay = authApi.canDisplaySampleIds(req, ds);
346
+ const samples = [];
347
+ for (const sampleId in data.samples) {
348
+ const values = data.samples[sampleId];
349
+ const x = values[q.coordTWs[0].$id]?.value;
350
+ const y = values[q.coordTWs[1]?.$id]?.value || 0;
351
+ const z = q.divideByTW ? values[q.divideByTW?.$id]?.value : 0;
352
+ if (x == void 0 || y == void 0 || z == void 0) continue;
353
+ if (!isComputable(q.coordTWs[0].term, x) || !isComputable(q.coordTWs[1]?.term, y) || !isComputable(q.divideByTW?.term, z)) {
354
+ continue;
355
+ }
356
+ const sample = { sampleId, x: Number(x), y: Number(y), z: Number(z) };
357
+ if (canDisplay) {
358
+ sample.sample = data.refs.bySampleId[sampleId]?.label || sampleId;
359
+ }
360
+ samples.push(sample);
361
+ }
362
+ return [samples, data];
363
+ }
364
+ function isComputable(term, value) {
365
+ if (!term) return true;
366
+ return !term.values?.[value]?.uncomputable;
367
+ }
368
+ async function loadFile(p, ds) {
369
+ const lines = (await read_file(path.join(serverconfig.tpmasterdir, p.file))).trim().split("\n");
370
+ const xColumn = p.coordsColumns?.x || 1;
371
+ const yColumn = p.coordsColumns?.y || 2;
372
+ const headerFields = lines[0].split(" ");
373
+ p.filterableSamples = [];
374
+ p.referenceSamples = [];
375
+ let invalidXY = 0;
376
+ for (let i = 1; i < lines.length; i++) {
377
+ const l = lines[i].trim().split(" ");
378
+ const x = Number(l[xColumn]), y = Number(l[yColumn]);
379
+ if (Number.isNaN(x) || Number.isNaN(y)) {
380
+ invalidXY++;
381
+ continue;
382
+ }
383
+ const sample = { sample: l[0], x, y };
384
+ if (p.colorColumn) {
385
+ sample["sampleId"] = l[0];
386
+ sample.category = l[p.colorColumn.index];
387
+ sample.shape = "Ref";
388
+ sample.z = 0;
389
+ }
390
+ const id = ds.cohort.termdb.q.sampleName2id(l[0]);
391
+ if (id == void 0) {
392
+ if (headerFields[3]) {
393
+ sample.info = {};
394
+ for (let j = 3; j < headerFields.length; j++) {
395
+ sample.info[headerFields[j]] = l[j];
396
+ }
397
+ }
398
+ p.referenceSamples.push(sample);
399
+ } else {
400
+ sample["sampleId"] = id;
401
+ p.filterableSamples.push(sample);
402
+ }
403
+ }
404
+ console.log(
405
+ p.name + " (prebuilt scatter):",
406
+ p.filterableSamples.length,
407
+ "lines,",
408
+ p.referenceSamples.length,
409
+ "reference cases,",
410
+ invalidXY,
411
+ "lines with invalid X/Y values"
412
+ );
413
+ }
414
+ async function mayInitiateScatterplots(ds) {
415
+ if (!ds.cohort.scatterplots) return;
416
+ if (!Array.isArray(ds.cohort.scatterplots.plots)) throw "cohort.scatterplots.plots is not array";
417
+ for (const p of ds.cohort.scatterplots.plots) {
418
+ if (!p.name) throw ".name missing from one of scatterplots.plots[]";
419
+ if (p.file) {
420
+ } else {
421
+ throw "unknown data source of one of scatterplots.plots[]";
422
+ }
423
+ }
424
+ }
425
+ async function trigger_getLowessCurve(q, res) {
426
+ const data = q.coords;
427
+ const result = JSON.parse(await run_R("lowess.R", JSON.stringify(data)));
428
+ const lowessCurve = [];
429
+ for (const [i, x] of Object.entries(result.x)) lowessCurve.push([x, result.y[i]]);
430
+ return res.send(lowessCurve);
431
+ }
432
+ export {
433
+ api,
434
+ mayInitiateScatterplots,
435
+ trigger_getLowessCurve
436
+ };
@@ -184,27 +184,17 @@ function validateGeneExpressionNative(G) {
184
184
  G.get = async (q) => {
185
185
  const h5file = path.join(serverconfig.tpmasterdir, G.folder, (q.sample.eID || q.sample.sID) + ".h5");
186
186
  await file_is_readable(h5file);
187
- const read_hdf5_input_type = { gene: q.gene, hdf5_file: h5file };
188
- let out;
189
- try {
190
- const time1 = Date.now();
191
- const rust_output = await run_rust("readHDF5", JSON.stringify(read_hdf5_input_type));
192
- mayLog("Time taken to query HDF5 file:", Date.now() - time1, "ms");
193
- for (const line of rust_output.split("\n")) {
194
- if (line.startsWith("output_string:")) {
195
- out = JSON.parse(line.replace("output_string:", ""));
196
- } else {
197
- console.log(line);
198
- }
199
- }
200
- } catch (e) {
201
- if (typeof e == "string") {
202
- const geneNotFound = `Gene '${q.gene}' not found in the HDF5 file`;
203
- if (e.includes(geneNotFound)) throw "No expression data for this gene";
204
- }
205
- console.log(e);
206
- throw "error reading h5 file: " + e;
187
+ const query_gene = q.gene;
188
+ if (!query_gene) {
189
+ throw new Error("Gene parameter is undefined");
207
190
  }
191
+ const read_hdf5_input_type = { query: [query_gene], hdf5_file: h5file };
192
+ const time1 = Date.now();
193
+ const rust_output = await run_rust("readH5", JSON.stringify(read_hdf5_input_type));
194
+ mayLog("Time taken to query HDF5 file:", Date.now() - time1, "ms");
195
+ const result = JSON.parse(rust_output);
196
+ const out = result.query_output[query_gene]?.samples;
197
+ if (!out) throw `No expression data for ${query_gene}`;
208
198
  return out;
209
199
  };
210
200
  }