@sjcrh/proteinpaint-server 2.106.0 → 2.108.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/routes/burden.js +114 -51
- package/routes/correlationVolcano.js +20 -2
- package/routes/termdb.DE.js +27 -6
- package/routes/termdb.cluster.js +18 -7
- package/src/app.js +222 -84
- package/utils/burden-ci95.R +134 -0
- package/utils/burden-main.R +46 -0
- package/utils/edge.R +10 -2
- package/utils/getBurden.R +371 -0
- package/utils/gsea.py +9 -4
- package/utils/burden.R +0 -366
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.108.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -60,8 +60,8 @@
|
|
|
60
60
|
"dependencies": {
|
|
61
61
|
"@sjcrh/augen": "2.87.0",
|
|
62
62
|
"@sjcrh/proteinpaint-rust": "2.99.0",
|
|
63
|
-
"@sjcrh/proteinpaint-shared": "2.
|
|
64
|
-
"@sjcrh/proteinpaint-types": "2.
|
|
63
|
+
"@sjcrh/proteinpaint-shared": "2.108.0",
|
|
64
|
+
"@sjcrh/proteinpaint-types": "2.108.0",
|
|
65
65
|
"@types/express": "^5.0.0",
|
|
66
66
|
"@types/express-session": "^1.18.1",
|
|
67
67
|
"better-sqlite3": "^9.4.1",
|
package/routes/burden.js
CHANGED
|
@@ -15,6 +15,7 @@ const api = {
|
|
|
15
15
|
}
|
|
16
16
|
}
|
|
17
17
|
};
|
|
18
|
+
const MAXBOOTNUM = 20;
|
|
18
19
|
function init({ genomes }) {
|
|
19
20
|
return async function handler(req, res) {
|
|
20
21
|
try {
|
|
@@ -27,72 +28,137 @@ function init({ genomes }) {
|
|
|
27
28
|
throw `invalid q.genome=${req.query.dslabel}`;
|
|
28
29
|
if (!ds.cohort.cumburden?.files)
|
|
29
30
|
throw `missing ds.cohort.cumburden.files`;
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
if (!ds.cohort?.cumburden?.db)
|
|
32
|
+
throw `missing ds.cohort.cumburden.db`;
|
|
33
|
+
if (!ds.cohort?.cumburden?.bootsubdir)
|
|
34
|
+
throw `missing ds.cohort.cumburden.bootsubdir`;
|
|
35
|
+
const result = await getBurdenResult(q, ds.cohort.cumburden);
|
|
36
|
+
if (!q.showCI) {
|
|
37
|
+
res.send({
|
|
38
|
+
status: "ok",
|
|
39
|
+
/*estimate: result.estimate,*/
|
|
40
|
+
...formatPayload(result.estimate)
|
|
41
|
+
});
|
|
42
|
+
} else {
|
|
43
|
+
if (!result.ci95)
|
|
44
|
+
await compute95ci(result, ds.cohort.cumburden);
|
|
45
|
+
res.send({
|
|
46
|
+
status: "ok",
|
|
47
|
+
/*ci95: result.ci95,*/
|
|
48
|
+
...formatPayload(result.ci95)
|
|
49
|
+
});
|
|
50
|
+
}
|
|
33
51
|
} catch (e) {
|
|
34
52
|
res.send({ status: "error", error: e.message || e });
|
|
35
53
|
}
|
|
36
54
|
};
|
|
37
55
|
}
|
|
38
|
-
async function
|
|
39
|
-
|
|
40
|
-
|
|
56
|
+
async function getBurdenResult(q, cumburden) {
|
|
57
|
+
const { id, jsonInput } = normalizeInput(q, cumburden);
|
|
58
|
+
let result = cumburden.db.connection.prepare("SELECT * FROM estimates WHERE id=?").get(id);
|
|
59
|
+
if (!result) {
|
|
60
|
+
result = { id, status: null, input: jsonInput };
|
|
61
|
+
const estJson = await run_R(path.join(serverconfig.binpath, "utils", "burden-main.R"), jsonInput, []);
|
|
62
|
+
const estimate = JSON.parse(estJson);
|
|
63
|
+
const ages = Object.keys(estimate[0]).filter((k) => k.startsWith("["));
|
|
64
|
+
const overall = { chc: 0 };
|
|
65
|
+
for (const age of ages) {
|
|
66
|
+
overall[age] = [0];
|
|
67
|
+
for (const est of estimate)
|
|
68
|
+
overall[age][0] += est[age];
|
|
69
|
+
}
|
|
70
|
+
estimate.push(overall);
|
|
71
|
+
const burden = {};
|
|
72
|
+
for (const est of estimate) {
|
|
73
|
+
burden[est.chc] = est;
|
|
74
|
+
}
|
|
75
|
+
cumburden.db.connection.prepare("INSERT INTO estimates (id, input, status, estimate) VALUES (?, ?, ?, ?)").run([result.id, jsonInput, 0, JSON.stringify(burden)]);
|
|
76
|
+
result.status = 0;
|
|
77
|
+
result.estimate = burden;
|
|
41
78
|
}
|
|
42
|
-
const
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
`${serverconfig.tpmasterdir}/${fit}`,
|
|
48
|
-
`${serverconfig.tpmasterdir}/${surv}`,
|
|
49
|
-
`${serverconfig.tpmasterdir}/${sample}`
|
|
50
|
-
];
|
|
51
|
-
const estimates = JSON.parse(
|
|
52
|
-
await run_R(path.join(serverconfig.binpath, "utils", "burden.R"), JSON.stringify(data), args)
|
|
53
|
-
);
|
|
54
|
-
return estimates;
|
|
79
|
+
for (const [k, v] of Object.entries(result)) {
|
|
80
|
+
if (k !== "id" && typeof v == "string")
|
|
81
|
+
result[k] = JSON.parse(v);
|
|
82
|
+
}
|
|
83
|
+
return result;
|
|
55
84
|
}
|
|
56
|
-
function
|
|
57
|
-
const
|
|
58
|
-
const
|
|
59
|
-
const
|
|
60
|
-
for (const k of
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
85
|
+
function normalizeInput(q, cumburden) {
|
|
86
|
+
const keys = Object.keys(q).filter((k) => k in defaultInputValues).sort();
|
|
87
|
+
const id = keys.map((k) => q[k]).join("-");
|
|
88
|
+
const normalized = {};
|
|
89
|
+
for (const k of keys)
|
|
90
|
+
normalized[k] = q[k];
|
|
91
|
+
normalized.datafiles = {
|
|
92
|
+
dir: path.join(serverconfig.tpmasterdir, cumburden.dir),
|
|
93
|
+
files: cumburden.files,
|
|
94
|
+
boosubdir: cumburden.bootsubdir
|
|
95
|
+
};
|
|
96
|
+
normalized.binpath = serverconfig.binpath;
|
|
97
|
+
const jsonInput = JSON.stringify(normalized);
|
|
98
|
+
return { id, jsonInput };
|
|
99
|
+
}
|
|
100
|
+
async function compute95ci(result, cumburden) {
|
|
101
|
+
try {
|
|
102
|
+
if (!result.input)
|
|
103
|
+
throw "result{} does not have .input";
|
|
104
|
+
const input = structuredClone(result.input);
|
|
105
|
+
input.burden = Object.values(result.estimate).filter((est) => est.chc !== 0);
|
|
106
|
+
const lowup = await run_R(path.join(serverconfig.binpath, "utils", "burden-ci95.R"), JSON.stringify(input), []);
|
|
107
|
+
const { low, up, overall } = JSON.parse(lowup);
|
|
108
|
+
const ci95 = { 0: {} };
|
|
109
|
+
for (const est of Object.values(result.estimate)) {
|
|
110
|
+
if (!ci95[est.chc])
|
|
111
|
+
ci95[est.chc] = {};
|
|
112
|
+
const lower = low.find((l) => l.chc === est.chc);
|
|
113
|
+
const upper = up.find((u) => u.chc === est.chc);
|
|
114
|
+
for (const [age, val] of Object.entries(est)) {
|
|
115
|
+
if (!age.startsWith("["))
|
|
116
|
+
continue;
|
|
117
|
+
const burden = est.chc === 0 ? overall[0][age] : val;
|
|
118
|
+
ci95[est.chc][age] = [burden, lower[age], upper[age]];
|
|
69
119
|
}
|
|
70
120
|
}
|
|
121
|
+
result.ci95 = ci95;
|
|
122
|
+
} catch (e) {
|
|
123
|
+
console.log(e);
|
|
71
124
|
}
|
|
125
|
+
await cumburden.db.connection.prepare(`UPDATE estimates SET ci95=? WHERE id=?`).run(JSON.stringify(result.ci95), result.id);
|
|
126
|
+
}
|
|
127
|
+
function sortNumericValue(a, b) {
|
|
128
|
+
return a < b ? -1 : 1;
|
|
129
|
+
}
|
|
130
|
+
function formatPayload(estimates) {
|
|
131
|
+
const rawKeys = Object.keys(estimates["1"]).filter((k) => k.startsWith("["));
|
|
132
|
+
const renamedKeys = rawKeys.map((k) => `burden${k.split(",")[0].slice(1)}`);
|
|
133
|
+
const outKeys = ["chc", ...renamedKeys];
|
|
72
134
|
const rows = [];
|
|
73
|
-
for (const
|
|
74
|
-
|
|
135
|
+
for (const [chc, burdenByAge] of Object.entries(estimates)) {
|
|
136
|
+
const arr = [chc];
|
|
137
|
+
for (const age of rawKeys)
|
|
138
|
+
arr.push(Array.isArray(burdenByAge[age]) ? burdenByAge[age] : [burdenByAge[age]]);
|
|
139
|
+
rows.push(arr);
|
|
75
140
|
}
|
|
76
141
|
return { keys: outKeys, rows };
|
|
77
142
|
}
|
|
78
|
-
const
|
|
143
|
+
const defaultInputValues = Object.freeze({
|
|
144
|
+
// showCI: false, do not track so it's not computed as part of unique ID
|
|
79
145
|
diaggrp: 5,
|
|
80
|
-
sex:
|
|
146
|
+
sex: 1,
|
|
81
147
|
white: 1,
|
|
82
|
-
agedx:
|
|
148
|
+
agedx: 6,
|
|
83
149
|
// chemotherapy
|
|
84
150
|
steriod: 0,
|
|
85
151
|
bleo: 0,
|
|
86
|
-
vcr:
|
|
87
|
-
//
|
|
88
|
-
etop:
|
|
89
|
-
//
|
|
152
|
+
vcr: 12,
|
|
153
|
+
// Vincristine
|
|
154
|
+
etop: 2500,
|
|
155
|
+
// Etoposide
|
|
90
156
|
itmt: 0,
|
|
91
157
|
// Intrathecal methothrexate_grp: 0,
|
|
92
|
-
ced:
|
|
93
|
-
//
|
|
94
|
-
cisp:
|
|
95
|
-
//
|
|
158
|
+
ced: 1.6,
|
|
159
|
+
// Cyclophosphamide, 0.7692 mean 7692.
|
|
160
|
+
cisp: 300,
|
|
161
|
+
// Cisplatin
|
|
96
162
|
dox: 0,
|
|
97
163
|
// Anthracycline, 3 mean 300 ml/m2
|
|
98
164
|
carbo: 0,
|
|
@@ -100,14 +166,11 @@ const defaults = Object.freeze({
|
|
|
100
166
|
hdmtx: 0,
|
|
101
167
|
// High-Dose Methotrexate
|
|
102
168
|
// radiation
|
|
103
|
-
brain:
|
|
104
|
-
|
|
105
|
-
chest: 0,
|
|
106
|
-
//2.4,
|
|
169
|
+
brain: 5.4,
|
|
170
|
+
chest: 2.4,
|
|
107
171
|
heart: 0,
|
|
108
172
|
pelvis: 0,
|
|
109
|
-
abd:
|
|
110
|
-
//2.4
|
|
173
|
+
abd: 2.4
|
|
111
174
|
});
|
|
112
175
|
export {
|
|
113
176
|
api
|
|
@@ -3,7 +3,10 @@ import { getData } from "../src/termdb.matrix.js";
|
|
|
3
3
|
import run_R from "../src/run_R.js";
|
|
4
4
|
import serverconfig from "../src/serverconfig.js";
|
|
5
5
|
import { mayLog } from "#src/helpers.ts";
|
|
6
|
+
import { stdDev } from "#shared/violin.bins.js";
|
|
6
7
|
import path from "path";
|
|
8
|
+
const minArrayLength = 3;
|
|
9
|
+
const minSD = 0.05;
|
|
7
10
|
const api = {
|
|
8
11
|
endpoint: "termdb/correlationVolcano",
|
|
9
12
|
methods: {
|
|
@@ -65,16 +68,31 @@ async function compute(q, ds, genome) {
|
|
|
65
68
|
vtid2array.get(tw.$id).v2.push(variableValue);
|
|
66
69
|
}
|
|
67
70
|
}
|
|
71
|
+
const [acceptedVariables, skippedVariables] = Array.from(vtid2array.values()).reduce(
|
|
72
|
+
([accepted, skipped], t) => {
|
|
73
|
+
const grterThanOne = t.v1.length > minArrayLength && t.v2.length > minArrayLength;
|
|
74
|
+
const significantSD = stdDev(t.v1) > minSD && stdDev(t.v2) > minSD;
|
|
75
|
+
const v = grterThanOne && significantSD ? accepted : skipped;
|
|
76
|
+
if (v === accepted)
|
|
77
|
+
accepted.push(t);
|
|
78
|
+
if (v === skipped)
|
|
79
|
+
skipped.push({ tw$id: t.id });
|
|
80
|
+
return [accepted, skipped];
|
|
81
|
+
},
|
|
82
|
+
[[], []]
|
|
83
|
+
);
|
|
84
|
+
const result = { skippedVariables, variableItems: [] };
|
|
85
|
+
if (!acceptedVariables.length)
|
|
86
|
+
return result;
|
|
68
87
|
const input = {
|
|
69
88
|
method: q.correlationMethod || "pearson",
|
|
70
|
-
terms:
|
|
89
|
+
terms: acceptedVariables
|
|
71
90
|
};
|
|
72
91
|
const time1 = Date.now();
|
|
73
92
|
const output = {
|
|
74
93
|
terms: JSON.parse(await run_R(path.join(serverconfig.binpath, "utils", "corr.R"), JSON.stringify(input)))
|
|
75
94
|
};
|
|
76
95
|
mayLog("Time taken to run correlation analysis:", Date.now() - time1);
|
|
77
|
-
const result = { variableItems: [] };
|
|
78
96
|
for (const t of output.terms) {
|
|
79
97
|
const t2 = {
|
|
80
98
|
tw$id: t.id,
|
package/routes/termdb.DE.js
CHANGED
|
@@ -58,12 +58,14 @@ async function run_DE(param, ds, term_results) {
|
|
|
58
58
|
throw "samplelst.groups[0].values.length<1";
|
|
59
59
|
if (param.samplelst.groups[1].values?.length < 1)
|
|
60
60
|
throw "samplelst.groups[1].values.length<1";
|
|
61
|
-
param.storage_type = ds.queries.rnaseqGeneCount.storage_type;
|
|
62
61
|
const q = ds.queries.rnaseqGeneCount;
|
|
63
62
|
if (!q)
|
|
64
63
|
return;
|
|
65
64
|
if (!q.file)
|
|
66
65
|
throw "unknown data type for rnaseqGeneCount";
|
|
66
|
+
if (!q.storage_type)
|
|
67
|
+
throw "storage_type is not defined";
|
|
68
|
+
param.storage_type = q.storage_type;
|
|
67
69
|
const group1names = [];
|
|
68
70
|
const conf1_group1 = [];
|
|
69
71
|
for (const s of param.samplelst.groups[0].values) {
|
|
@@ -73,9 +75,17 @@ async function run_DE(param, ds, term_results) {
|
|
|
73
75
|
if (!n)
|
|
74
76
|
continue;
|
|
75
77
|
if (q.allSampleSet.has(n)) {
|
|
76
|
-
group1names.push(n);
|
|
77
78
|
if (param.tw) {
|
|
78
|
-
|
|
79
|
+
if (term_results.samples[s.sampleId]) {
|
|
80
|
+
if (param.tw.q.mode == "continuous") {
|
|
81
|
+
conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
|
|
82
|
+
} else {
|
|
83
|
+
conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
|
|
84
|
+
}
|
|
85
|
+
group1names.push(n);
|
|
86
|
+
}
|
|
87
|
+
} else {
|
|
88
|
+
group1names.push(n);
|
|
79
89
|
}
|
|
80
90
|
} else {
|
|
81
91
|
}
|
|
@@ -89,9 +99,17 @@ async function run_DE(param, ds, term_results) {
|
|
|
89
99
|
if (!n)
|
|
90
100
|
continue;
|
|
91
101
|
if (q.allSampleSet.has(n)) {
|
|
92
|
-
group2names.push(n);
|
|
93
102
|
if (param.tw) {
|
|
94
|
-
|
|
103
|
+
if (term_results.samples[s.sampleId]) {
|
|
104
|
+
if (param.tw.q.mode == "continuous") {
|
|
105
|
+
conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
|
|
106
|
+
} else {
|
|
107
|
+
conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
|
|
108
|
+
}
|
|
109
|
+
group2names.push(n);
|
|
110
|
+
}
|
|
111
|
+
} else {
|
|
112
|
+
group2names.push(n);
|
|
95
113
|
}
|
|
96
114
|
} else {
|
|
97
115
|
}
|
|
@@ -115,7 +133,10 @@ async function run_DE(param, ds, term_results) {
|
|
|
115
133
|
};
|
|
116
134
|
if (param.tw) {
|
|
117
135
|
expression_input.conf1 = [...conf1_group2, ...conf1_group1];
|
|
118
|
-
expression_input.
|
|
136
|
+
expression_input.conf1_mode = param.tw.q.mode;
|
|
137
|
+
if (new Set(expression_input.conf1).size === 1) {
|
|
138
|
+
throw "Confounding variable has only one value";
|
|
139
|
+
}
|
|
119
140
|
}
|
|
120
141
|
const sample_size_limit = 8;
|
|
121
142
|
let result;
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -9,6 +9,7 @@ import { clusterMethodLst, distanceMethodLst } from "#shared/clustering.js";
|
|
|
9
9
|
import { getResult as getResultGene } from "#src/gene.js";
|
|
10
10
|
import { TermTypes, NUMERIC_DICTIONARY_TERM } from "#shared/terms.js";
|
|
11
11
|
import { getData } from "#src/termdb.matrix.js";
|
|
12
|
+
import { termType2label } from "#shared/terms.js";
|
|
12
13
|
const api = {
|
|
13
14
|
endpoint: "termdb/cluster",
|
|
14
15
|
methods: {
|
|
@@ -106,14 +107,24 @@ async function getNumericDictTermAnnotation(q, ds, genome) {
|
|
|
106
107
|
}
|
|
107
108
|
async function doClustering(data, q, numCases = 1e3) {
|
|
108
109
|
const sampleSet = /* @__PURE__ */ new Set();
|
|
110
|
+
let firstTerm = true;
|
|
109
111
|
for (const o of data.values()) {
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
112
|
+
const currentSampleIds = new Set(Object.keys(o));
|
|
113
|
+
if (firstTerm) {
|
|
114
|
+
currentSampleIds.forEach((id) => sampleSet.add(id));
|
|
115
|
+
firstTerm = false;
|
|
116
|
+
} else {
|
|
117
|
+
for (const id of sampleSet) {
|
|
118
|
+
if (!currentSampleIds.has(id)) {
|
|
119
|
+
sampleSet.delete(id);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
114
123
|
}
|
|
115
124
|
if (sampleSet.size == 0)
|
|
116
|
-
throw
|
|
125
|
+
throw `termdb.cluster: There are no overlapping tested samples shared across the selected ${termType2label(
|
|
126
|
+
q.dataType
|
|
127
|
+
)}`;
|
|
117
128
|
if (!clusterMethodLst.find((i) => i.value == q.clusterMethod))
|
|
118
129
|
throw "Invalid cluster method";
|
|
119
130
|
if (!distanceMethodLst.find((i) => i.value == q.distanceMethod))
|
|
@@ -122,7 +133,7 @@ async function doClustering(data, q, numCases = 1e3) {
|
|
|
122
133
|
matrix: [],
|
|
123
134
|
row_names: [],
|
|
124
135
|
// genes
|
|
125
|
-
col_names: [...sampleSet],
|
|
136
|
+
col_names: [...sampleSet].slice(0, numCases),
|
|
126
137
|
// samples
|
|
127
138
|
cluster_method: q.clusterMethod,
|
|
128
139
|
distance_method: q.distanceMethod,
|
|
@@ -133,7 +144,7 @@ async function doClustering(data, q, numCases = 1e3) {
|
|
|
133
144
|
inputData.row_names.push(gene);
|
|
134
145
|
const row = [];
|
|
135
146
|
for (const s of inputData.col_names) {
|
|
136
|
-
row.push(o[s]
|
|
147
|
+
row.push(o[s]);
|
|
137
148
|
}
|
|
138
149
|
inputData.matrix.push(q.zScoreTransformation ? getZscore(row) : row);
|
|
139
150
|
}
|