@sjcrh/proteinpaint-server 2.182.0 → 2.182.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/genome/hg38.base.js +492 -0
- package/genome/hg38.js +2 -487
- package/genome/hg38.mmrf.js +13 -0
- package/package.json +4 -4
- package/routes/profile.polar2.js +1 -1
- package/routes/termdb.proteome.js +400 -0
- package/routes/termdb.singlecellSamples.js +8 -4
- package/routes/termdb.violinBox.js +137 -88
- package/src/app.js +4702 -4288
- package/src/mds3.gdc.filter.js +1 -0
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import { violinBoxPayload } from "#types/checkers";
|
|
2
2
|
import { scaleLinear, scaleLog } from "d3";
|
|
3
3
|
import { run_rust } from "@sjcrh/proteinpaint-rust";
|
|
4
|
+
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
4
5
|
import { getData } from "../src/termdb.matrix.js";
|
|
5
6
|
import { createCanvas } from "canvas";
|
|
6
7
|
import { getOrderedLabels } from "../src/termdb.barchart.js";
|
|
7
8
|
import { getDescrStats, getStdDev, getMean } from "./termdb.descrstats.ts";
|
|
8
9
|
import { isNumericTerm } from "#shared/terms.js";
|
|
9
10
|
import { boxplot_getvalue } from "../src/utils.js";
|
|
10
|
-
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
11
11
|
import { roundValueAuto } from "#shared/roundValue.js";
|
|
12
|
+
import { mayLog } from "#src/helpers.ts";
|
|
12
13
|
const api = {
|
|
13
14
|
endpoint: "termdb/violinBox",
|
|
14
15
|
methods: {
|
|
@@ -33,7 +34,8 @@ function init({ genomes }) {
|
|
|
33
34
|
if (!ds) throw new Error("invalid ds");
|
|
34
35
|
if (typeof q.tw?.term != "object" || typeof q.tw?.q != "object") throw new Error("q.tw not of {term,q}");
|
|
35
36
|
const term = q.tw.term;
|
|
36
|
-
if (!isNumericTerm(term) && term.type !== "survival"
|
|
37
|
+
if (!isNumericTerm(term) && term.type !== "survival" && term.type !== "termCollection")
|
|
38
|
+
throw new Error("term type is not numeric, survival, or termCollection");
|
|
37
39
|
const arg = {
|
|
38
40
|
terms: [q.tw],
|
|
39
41
|
filter: q.filter,
|
|
@@ -47,6 +49,7 @@ function init({ genomes }) {
|
|
|
47
49
|
const data = await getData(arg, ds);
|
|
48
50
|
if (!data) throw new Error("getData() returns nothing");
|
|
49
51
|
if (data.error) throw new Error(data.error);
|
|
52
|
+
if (q.tw.term.type === "termCollection") expandNumericTermCollection(q, data);
|
|
50
53
|
if (q.plotType === "violin") {
|
|
51
54
|
result = await getViolin(q, data, ds);
|
|
52
55
|
} else if (q.plotType === "box") {
|
|
@@ -61,17 +64,61 @@ function init({ genomes }) {
|
|
|
61
64
|
res.send(result);
|
|
62
65
|
};
|
|
63
66
|
}
|
|
67
|
+
function expandNumericTermCollection(q, data) {
|
|
68
|
+
const term = q.tw.term;
|
|
69
|
+
if (term.memberType !== "numeric") throw new Error("only numeric termCollection is supported for violinBox");
|
|
70
|
+
if (q.overlayTw)
|
|
71
|
+
throw new Error("overlayTw is not supported with numeric termCollection; member terms are used as the overlay");
|
|
72
|
+
if (q.divideTw) throw new Error("divideTw is not supported with numeric termCollection");
|
|
73
|
+
const termlst = term.termlst || [];
|
|
74
|
+
mayLog("termlst", termlst);
|
|
75
|
+
mayLog(
|
|
76
|
+
`Expanding numeric termCollection with ${termlst.length} member terms and ${Object.keys(data.samples).length} samples`
|
|
77
|
+
);
|
|
78
|
+
const propsByTermId = term.propsByTermId || {};
|
|
79
|
+
const tcId = q.tw.$id;
|
|
80
|
+
const memberNameById = {};
|
|
81
|
+
const overlayValues = {};
|
|
82
|
+
for (const mt of termlst) {
|
|
83
|
+
const name = mt.name || mt.id;
|
|
84
|
+
memberNameById[mt.id] = name;
|
|
85
|
+
overlayValues[name] = { label: name, color: propsByTermId[mt.id]?.color };
|
|
86
|
+
}
|
|
87
|
+
const newSamples = {};
|
|
88
|
+
for (const [sampleId, sampleData] of Object.entries(data.samples)) {
|
|
89
|
+
const tcEntry = sampleData[tcId];
|
|
90
|
+
const memberValues = tcEntry?.value;
|
|
91
|
+
if (!memberValues || typeof memberValues !== "object") continue;
|
|
92
|
+
for (const [memberId, memberVal] of Object.entries(memberValues)) {
|
|
93
|
+
if (typeof memberVal !== "number" || !Number.isFinite(memberVal)) continue;
|
|
94
|
+
const memberName = memberNameById[memberId] || memberId;
|
|
95
|
+
newSamples[`${sampleId}__${memberId}`] = {
|
|
96
|
+
...sampleData,
|
|
97
|
+
[tcId]: { key: memberVal, value: memberVal },
|
|
98
|
+
["__tcOverlay"]: { key: memberName, value: memberName }
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
data.samples = newSamples;
|
|
103
|
+
q.overlayTw = {
|
|
104
|
+
$id: "__tcOverlay",
|
|
105
|
+
term: { type: "categorical", values: overlayValues, name: term.name },
|
|
106
|
+
q: {}
|
|
107
|
+
};
|
|
108
|
+
data.refs.byTermId["__tcOverlay"] = {
|
|
109
|
+
keyOrder: termlst.map((t) => t.name || t.id)
|
|
110
|
+
};
|
|
111
|
+
}
|
|
64
112
|
async function getViolin(q, data, ds) {
|
|
65
113
|
const samples = Object.values(data.samples);
|
|
66
|
-
|
|
67
|
-
if (q.isLogScale) values = values.filter((v) => v > 0);
|
|
114
|
+
const values = extractNumericValues(samples, q.tw, q.isLogScale);
|
|
68
115
|
const descrStats = getDescrStats(values);
|
|
69
|
-
const sampleType =
|
|
70
|
-
if (data.error) throw new Error(data.error);
|
|
116
|
+
const sampleType = computeSampleType(data);
|
|
71
117
|
if (q.overlayTw && data.refs.byTermId[q.overlayTw.$id]) {
|
|
118
|
+
;
|
|
72
119
|
data.refs.byTermId[q.overlayTw.$id].orderedLabels = getOrderedLabels(
|
|
73
|
-
q.overlayTw,
|
|
74
|
-
data.refs.byTermId[q.overlayTw.$id]?.bins,
|
|
120
|
+
q.overlayTw.term,
|
|
121
|
+
data.refs.byTermId[q.overlayTw.$id]?.bins || [],
|
|
75
122
|
void 0,
|
|
76
123
|
q.overlayTw.q
|
|
77
124
|
);
|
|
@@ -102,8 +149,7 @@ function divideValues(q, data, sampleType) {
|
|
|
102
149
|
q,
|
|
103
150
|
data,
|
|
104
151
|
sampleType,
|
|
105
|
-
useLog
|
|
106
|
-
// avoid tsc err
|
|
152
|
+
!!useLog,
|
|
107
153
|
overlayTerm,
|
|
108
154
|
divideTerm
|
|
109
155
|
);
|
|
@@ -134,23 +180,17 @@ function setViolinResponse(valuesObject, data, q) {
|
|
|
134
180
|
const plots = [];
|
|
135
181
|
for (const [plot, values] of sortPlot2Values(data, plot2values, overlayTerm)) {
|
|
136
182
|
plots.push({
|
|
137
|
-
label: overlayTerm?.term?.values?.[plot]?.label || plot,
|
|
138
|
-
// avoid strange tsc err
|
|
183
|
+
label: String(overlayTerm?.term?.values?.[plot]?.label || plot),
|
|
139
184
|
values,
|
|
140
|
-
seriesId: plot,
|
|
141
|
-
chartId: chart,
|
|
142
|
-
//quick fix to get list samples working
|
|
185
|
+
seriesId: String(plot),
|
|
186
|
+
chartId: String(chart),
|
|
143
187
|
plotValueCount: values?.length,
|
|
144
188
|
color: overlayTerm?.term?.values?.[plot]?.color || ""
|
|
145
189
|
});
|
|
146
190
|
}
|
|
147
|
-
charts[chart] = { chartId: chart, plots };
|
|
191
|
+
charts[String(chart)] = { chartId: String(chart), plots };
|
|
148
192
|
}
|
|
149
|
-
const bins =
|
|
150
|
-
term1: numericBins(q.tw, data)
|
|
151
|
-
};
|
|
152
|
-
if (overlayTerm) bins.term2 = numericBins(overlayTerm, data);
|
|
153
|
-
if (divideTw) bins.term0 = numericBins(divideTw, data);
|
|
193
|
+
const bins = buildBins(q.tw, data, overlayTerm, divideTw);
|
|
154
194
|
const result = {
|
|
155
195
|
min: valuesObject.min,
|
|
156
196
|
max: valuesObject.max,
|
|
@@ -218,30 +258,11 @@ async function createCanvasImg(q, result, ds) {
|
|
|
218
258
|
}
|
|
219
259
|
}
|
|
220
260
|
async function getViolinWilcoxonData(result) {
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
for (let i = 0; i < numPlots; i++) {
|
|
227
|
-
const group1_id = chart.plots[i].label;
|
|
228
|
-
const group1_values = chart.plots[i].values;
|
|
229
|
-
for (let j = i + 1; j < numPlots; j++) {
|
|
230
|
-
const group2_id = chart.plots[j].label;
|
|
231
|
-
const group2_values = chart.plots[j].values;
|
|
232
|
-
wilcoxInput.push({ group1_id, group1_values, group2_id, group2_values });
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
const wilcoxOutput = JSON.parse(await run_rust("wilcoxon", JSON.stringify(wilcoxInput)));
|
|
236
|
-
chart.pvalues = [];
|
|
237
|
-
for (const test of wilcoxOutput) {
|
|
238
|
-
if (test.pvalue == null || test.pvalue == "null") {
|
|
239
|
-
chart.pvalues.push([{ value: test.group1_id }, { value: test.group2_id }, { html: "NA" }]);
|
|
240
|
-
} else {
|
|
241
|
-
chart.pvalues.push([{ value: test.group1_id }, { value: test.group2_id }, { html: test.pvalue.toPrecision(4) }]);
|
|
242
|
-
}
|
|
243
|
-
}
|
|
244
|
-
}
|
|
261
|
+
await runWilcoxonTests(
|
|
262
|
+
result.charts,
|
|
263
|
+
{ getGroupId: (plot) => plot.label, getGroupValues: (plot) => plot.values },
|
|
264
|
+
"pvalues"
|
|
265
|
+
);
|
|
245
266
|
}
|
|
246
267
|
async function getDensity(values) {
|
|
247
268
|
const result = await getDensities({ plot: values });
|
|
@@ -297,17 +318,16 @@ async function getBoxPlot(q, data) {
|
|
|
297
318
|
}
|
|
298
319
|
async function processBoxPlotData(data, q) {
|
|
299
320
|
const samples = Object.values(data.samples);
|
|
300
|
-
const values = samples
|
|
321
|
+
const values = extractNumericValues(samples, q.tw);
|
|
301
322
|
const descrStats = getDescrStats(values, q.removeOutliers);
|
|
302
|
-
const sampleType =
|
|
323
|
+
const sampleType = computeSampleType(data);
|
|
303
324
|
const overlayTw = q.overlayTw;
|
|
304
325
|
const divideTw = q.divideTw;
|
|
305
326
|
const { absMin, absMax, chart2plot2values, uncomputableValues } = parseValues(
|
|
306
327
|
q,
|
|
307
328
|
data,
|
|
308
329
|
sampleType,
|
|
309
|
-
q.isLogScale
|
|
310
|
-
// avoid tsc err
|
|
330
|
+
!!q.isLogScale,
|
|
311
331
|
overlayTw,
|
|
312
332
|
divideTw
|
|
313
333
|
);
|
|
@@ -338,16 +358,12 @@ async function processBoxPlotData(data, q) {
|
|
|
338
358
|
plots.sort((a, b) => a.boxplot.p50 - b.boxplot.p50);
|
|
339
359
|
}
|
|
340
360
|
const sampleCount = plots.reduce((total, p) => {
|
|
341
|
-
if (p.
|
|
361
|
+
if (p.isHidden) return total;
|
|
342
362
|
return total + p.descrStats.total.value;
|
|
343
363
|
}, 0);
|
|
344
|
-
charts[chart] = { chartId: chart, plots, sampleCount };
|
|
364
|
+
charts[String(chart)] = { chartId: String(chart), plots, sampleCount };
|
|
345
365
|
}
|
|
346
|
-
const bins =
|
|
347
|
-
term1: numericBins(q.tw, data)
|
|
348
|
-
};
|
|
349
|
-
if (overlayTw) bins.term2 = numericBins(overlayTw, data);
|
|
350
|
-
if (divideTw) bins.term0 = numericBins(divideTw, data);
|
|
366
|
+
const bins = buildBins(q.tw, data, overlayTw, divideTw);
|
|
351
367
|
if (q.showAssocTests && overlayTw) await getBoxPlotWilcoxonData(charts);
|
|
352
368
|
Object.keys(charts).forEach((c) => charts[c].plots.forEach((p) => delete p.tempValues));
|
|
353
369
|
return { absMin, absMax, bins, charts, uncomputableValues, descrStats, outlierMin, outlierMax };
|
|
@@ -367,14 +383,13 @@ function setPlotData(plots, values, key, sampleType, descrStats, q, outlierMin,
|
|
|
367
383
|
const plot = {
|
|
368
384
|
boxplot,
|
|
369
385
|
descrStats: setIndividualBoxPlotStats(boxplot, sortedValues),
|
|
370
|
-
//
|
|
371
|
-
//to delete later
|
|
386
|
+
// See comment in processBoxPlotData about tempValues
|
|
372
387
|
tempValues: sortedValues
|
|
373
388
|
};
|
|
374
389
|
if (overlayTw) {
|
|
375
390
|
const _key = overlayTw?.term?.values?.[key]?.label || key;
|
|
376
|
-
plot.color = overlayTw?.term?.values?.[key]?.color ||
|
|
377
|
-
plot.key = _key;
|
|
391
|
+
plot.color = overlayTw?.term?.values?.[key]?.color || void 0;
|
|
392
|
+
plot.key = String(_key);
|
|
378
393
|
plot.seriesId = key;
|
|
379
394
|
plot.boxplot.label = `${_key}, n=${values.length}`;
|
|
380
395
|
} else {
|
|
@@ -419,26 +434,36 @@ function setUncomputableValues(values) {
|
|
|
419
434
|
} else return null;
|
|
420
435
|
}
|
|
421
436
|
async function getBoxPlotWilcoxonData(charts) {
|
|
437
|
+
await runWilcoxonTests(
|
|
438
|
+
charts,
|
|
439
|
+
{
|
|
440
|
+
getGroupId: (plot) => plot.boxplot.label.replace(/, n=\d+$/, ""),
|
|
441
|
+
getGroupValues: (plot) => plot.tempValues
|
|
442
|
+
},
|
|
443
|
+
"wilcoxon"
|
|
444
|
+
);
|
|
445
|
+
}
|
|
446
|
+
async function runWilcoxonTests(charts, accessors, resultKey) {
|
|
422
447
|
for (const chart of Object.values(charts)) {
|
|
423
448
|
const numPlots = chart.plots?.length;
|
|
424
|
-
if (numPlots < 2) continue;
|
|
425
|
-
const
|
|
449
|
+
if (!numPlots || numPlots < 2) continue;
|
|
450
|
+
const wilcoxInput = [];
|
|
426
451
|
for (let i = 0; i < numPlots; i++) {
|
|
427
|
-
const group1_id = chart.plots[i]
|
|
428
|
-
const group1_values = chart.plots[i]
|
|
452
|
+
const group1_id = accessors.getGroupId(chart.plots[i]);
|
|
453
|
+
const group1_values = accessors.getGroupValues(chart.plots[i]);
|
|
429
454
|
for (let j = i + 1; j < numPlots; j++) {
|
|
430
|
-
const group2_id = chart.plots[j]
|
|
431
|
-
const group2_values = chart.plots[j]
|
|
432
|
-
|
|
455
|
+
const group2_id = accessors.getGroupId(chart.plots[j]);
|
|
456
|
+
const group2_values = accessors.getGroupValues(chart.plots[j]);
|
|
457
|
+
wilcoxInput.push({ group1_id, group1_values, group2_id, group2_values });
|
|
433
458
|
}
|
|
434
459
|
}
|
|
435
|
-
const
|
|
436
|
-
chart
|
|
437
|
-
for (const test of
|
|
460
|
+
const wilcoxOutput = JSON.parse(await run_rust("wilcoxon", JSON.stringify(wilcoxInput)));
|
|
461
|
+
chart[resultKey] = [];
|
|
462
|
+
for (const test of wilcoxOutput) {
|
|
438
463
|
if (test.pvalue == null || test.pvalue == "null") {
|
|
439
|
-
chart.
|
|
464
|
+
chart[resultKey].push([{ value: test.group1_id }, { value: test.group2_id }, { html: "NA" }]);
|
|
440
465
|
} else {
|
|
441
|
-
chart.
|
|
466
|
+
chart[resultKey].push([
|
|
442
467
|
{ value: test.group1_id },
|
|
443
468
|
{ value: test.group2_id },
|
|
444
469
|
{ html: test.pvalue.toPrecision(4) }
|
|
@@ -447,37 +472,49 @@ async function getBoxPlotWilcoxonData(charts) {
|
|
|
447
472
|
}
|
|
448
473
|
}
|
|
449
474
|
}
|
|
475
|
+
function computeSampleType(data) {
|
|
476
|
+
return `All ${data.sampleType?.plural_name || "samples"}`;
|
|
477
|
+
}
|
|
478
|
+
function buildBins(tw, data, overlayTw, divideTw) {
|
|
479
|
+
const bins = {
|
|
480
|
+
term1: numericBins(tw, data)
|
|
481
|
+
};
|
|
482
|
+
if (overlayTw) bins.term2 = numericBins(overlayTw, data);
|
|
483
|
+
if (divideTw) bins.term0 = numericBins(divideTw, data);
|
|
484
|
+
return bins;
|
|
485
|
+
}
|
|
486
|
+
function extractNumericValues(samples, tw, isLogScale) {
|
|
487
|
+
let values = samples.map((s) => s?.[tw.$id]?.value).filter((v) => typeof v === "number" && !tw.term.values?.[v]?.uncomputable);
|
|
488
|
+
if (isLogScale) values = values.filter((v) => v > 0);
|
|
489
|
+
return values;
|
|
490
|
+
}
|
|
450
491
|
function parseValues(q, data, sampleType, isLog, overlayTw, divideTw) {
|
|
451
492
|
const chart2plot2values = /* @__PURE__ */ new Map();
|
|
452
493
|
const uncomputableValues = {};
|
|
494
|
+
function trackUncomputable(tw, key) {
|
|
495
|
+
if (!tw?.term?.values?.[key]?.uncomputable) return false;
|
|
496
|
+
const label = tw.term.values[key]?.label;
|
|
497
|
+
if (label) uncomputableValues[label] = (uncomputableValues[label] || 0) + 1;
|
|
498
|
+
return true;
|
|
499
|
+
}
|
|
453
500
|
let absMin = Infinity, absMax = -Infinity;
|
|
454
501
|
for (const val of Object.values(data.samples)) {
|
|
455
502
|
const value = val[q.tw.$id];
|
|
456
503
|
if (!Number.isFinite(value?.value)) continue;
|
|
457
|
-
if (q.tw
|
|
458
|
-
const label = q.tw.term.values[value.value].label;
|
|
459
|
-
uncomputableValues[label] = (uncomputableValues[label] || 0) + 1;
|
|
460
|
-
continue;
|
|
461
|
-
}
|
|
504
|
+
if (trackUncomputable(q.tw, value.value)) continue;
|
|
462
505
|
if (isLog && value.value <= 0) continue;
|
|
463
506
|
let chart = "";
|
|
464
507
|
let plot = sampleType;
|
|
465
508
|
if (divideTw) {
|
|
466
|
-
if (!val[divideTw
|
|
509
|
+
if (!val[divideTw.$id]) continue;
|
|
467
510
|
const value0 = val[divideTw.$id];
|
|
468
|
-
|
|
469
|
-
const label = divideTw.term.values[value0?.key]?.label;
|
|
470
|
-
uncomputableValues[label] = (uncomputableValues[label] || 0) + 1;
|
|
471
|
-
}
|
|
511
|
+
trackUncomputable(divideTw, value0.key);
|
|
472
512
|
chart = value0.key;
|
|
473
513
|
}
|
|
474
514
|
if (overlayTw) {
|
|
475
|
-
if (!val[overlayTw
|
|
515
|
+
if (!val[overlayTw.$id]) continue;
|
|
476
516
|
const value2 = val[overlayTw.$id];
|
|
477
|
-
|
|
478
|
-
const label = overlayTw.term.values[value2?.key]?.label;
|
|
479
|
-
uncomputableValues[label] = (uncomputableValues[label] || 0) + 1;
|
|
480
|
-
}
|
|
517
|
+
trackUncomputable(overlayTw, value2.key);
|
|
481
518
|
plot = value2.key;
|
|
482
519
|
}
|
|
483
520
|
if (!chart2plot2values.has(chart)) chart2plot2values.set(chart, /* @__PURE__ */ new Map());
|
|
@@ -493,17 +530,29 @@ function parseValues(q, data, sampleType, isLog, overlayTw, divideTw) {
|
|
|
493
530
|
function numericBins(tw, data) {
|
|
494
531
|
const bins = {};
|
|
495
532
|
if (!isNumericTerm(tw?.term)) return bins;
|
|
496
|
-
for (const bin of data.refs.byTermId[tw
|
|
533
|
+
for (const bin of data.refs.byTermId[tw.$id]?.bins || []) {
|
|
497
534
|
bins[bin.label] = bin;
|
|
498
535
|
}
|
|
499
536
|
return bins;
|
|
500
537
|
}
|
|
501
538
|
export {
|
|
502
539
|
api,
|
|
540
|
+
buildBins,
|
|
541
|
+
computeSampleType,
|
|
542
|
+
divideValues,
|
|
543
|
+
expandNumericTermCollection,
|
|
544
|
+
extractNumericValues,
|
|
503
545
|
getDensities,
|
|
504
546
|
getDensity,
|
|
505
547
|
getViolinWilcoxonData,
|
|
506
548
|
numericBins,
|
|
507
549
|
parseValues,
|
|
550
|
+
setHiddenPlots,
|
|
551
|
+
setIndividualBoxPlotStats,
|
|
552
|
+
setPlotData,
|
|
553
|
+
setScaleData,
|
|
554
|
+
setUncomputableValues,
|
|
555
|
+
setViolinResponse,
|
|
556
|
+
sortObj,
|
|
508
557
|
sortPlot2Values
|
|
509
558
|
};
|