@sjcrh/proteinpaint-server 2.184.1-0 → 2.185.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/protected.test.js +5 -0
- package/dataset/termdb.test.js +1 -1
- package/package.json +5 -5
- package/routes/brainImagingSamples.js +15 -4
- package/routes/genesetEnrichment.js +101 -42
- package/routes/profile.radar2.js +112 -0
- package/routes/profile.radarFacility2.js +148 -0
- package/routes/saveWSIAnnotation.js +21 -0
- package/routes/termdb.DE.js +31 -238
- package/routes/termdb.cluster.js +44 -9
- package/routes/termdb.config.js +1 -0
- package/routes/termdb.diffMeth.js +4 -2
- package/routes/termdb.proteome.js +28 -20
- package/routes/termdb.singlecellDEgenes.js +2 -1
- package/routes/termdb.singlecellSamples.js +36 -5
- package/src/app.js +1387 -649
- package/src/serverconfig.js +16 -1
package/routes/termdb.DE.js
CHANGED
|
@@ -1,15 +1,11 @@
|
|
|
1
|
-
import fs from "fs";
|
|
2
1
|
import path from "path";
|
|
3
2
|
import { diffExpPayload } from "#types/checkers";
|
|
4
|
-
import { run_rust } from "@sjcrh/proteinpaint-rust";
|
|
5
|
-
import { getData } from "../src/termdb.matrix.js";
|
|
6
|
-
import { get_ds_tdb } from "../src/termdb.js";
|
|
7
|
-
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
8
3
|
import { mayLog } from "#src/helpers.ts";
|
|
9
4
|
import serverconfig from "../src/serverconfig.js";
|
|
10
|
-
import { imageSize } from "image-size";
|
|
11
5
|
import { get_header_txt } from "#src/utils.js";
|
|
12
|
-
import {
|
|
6
|
+
import { run_rust } from "@sjcrh/proteinpaint-rust";
|
|
7
|
+
import { renderVolcano } from "../src/renderVolcano.ts";
|
|
8
|
+
import { readCacheFileOrRecompute, resolveDeContext, resolveSampleGroups } from "../src/diffAnalysis.ts";
|
|
13
9
|
const api = {
|
|
14
10
|
endpoint: "termdb/DE",
|
|
15
11
|
methods: {
|
|
@@ -27,244 +23,41 @@ function init({ genomes }) {
|
|
|
27
23
|
return async (req, res) => {
|
|
28
24
|
try {
|
|
29
25
|
const q = req.query;
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
);
|
|
44
|
-
if (term_results.error) throw new Error(term_results.error);
|
|
45
|
-
}
|
|
46
|
-
let term_results2 = [];
|
|
47
|
-
if (q.tw2) {
|
|
48
|
-
const terms2 = [q.tw2];
|
|
49
|
-
term_results2 = await getData(
|
|
50
|
-
{
|
|
51
|
-
filter: q.filter,
|
|
52
|
-
filter0: q.filter0,
|
|
53
|
-
terms: terms2
|
|
54
|
-
},
|
|
55
|
-
ds
|
|
56
|
-
);
|
|
57
|
-
if (term_results2.error) throw new Error(term_results2.error);
|
|
26
|
+
if (q.preAnalysis) {
|
|
27
|
+
const { ds, term_results, term_results2 } = await resolveDeContext(q, genomes);
|
|
28
|
+
const groups = resolveSampleGroups(q, ds, term_results, term_results2);
|
|
29
|
+
const group1Name = q.samplelst.groups[0].name;
|
|
30
|
+
const group2Name = q.samplelst.groups[1].name;
|
|
31
|
+
res.send({
|
|
32
|
+
data: {
|
|
33
|
+
[group1Name]: groups.group1names.length,
|
|
34
|
+
[group2Name]: groups.group2names.length,
|
|
35
|
+
...groups.alerts.length ? { alert: groups.alerts.join(" | ") } : {}
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
return;
|
|
58
39
|
}
|
|
59
|
-
const
|
|
60
|
-
|
|
61
|
-
|
|
40
|
+
const { cacheId, geneData, sample_size1, sample_size2, method, images, bcv } = await readCacheFileOrRecompute({
|
|
41
|
+
daRequest: q,
|
|
42
|
+
genomes
|
|
43
|
+
});
|
|
44
|
+
const rendered = await renderVolcano(geneData, q.volcanoRender);
|
|
45
|
+
rendered.cacheId = cacheId;
|
|
46
|
+
const output = {
|
|
47
|
+
data: rendered,
|
|
48
|
+
sample_size1,
|
|
49
|
+
sample_size2,
|
|
50
|
+
method,
|
|
51
|
+
images
|
|
52
|
+
};
|
|
53
|
+
if (bcv != null) output.bcv = bcv;
|
|
54
|
+
res.send(output);
|
|
62
55
|
} catch (e) {
|
|
63
56
|
res.send({ status: "error", error: e.message || e });
|
|
64
57
|
if (e instanceof Error && e.stack) console.log(e);
|
|
65
58
|
}
|
|
66
59
|
};
|
|
67
60
|
}
|
|
68
|
-
async function run_DE(param, ds, term_results, term_results2) {
|
|
69
|
-
if (param.samplelst?.groups?.length != 2) throw new Error(".samplelst.groups.length!=2");
|
|
70
|
-
if (param.samplelst.groups[0].values?.length < 1) throw new Error("samplelst.groups[0].values.length<1");
|
|
71
|
-
if (param.samplelst.groups[1].values?.length < 1) throw new Error("samplelst.groups[1].values.length<1");
|
|
72
|
-
const q = ds.queries.rnaseqGeneCount;
|
|
73
|
-
if (!q) return;
|
|
74
|
-
if (!q.file) throw new Error("unknown data type for rnaseqGeneCount");
|
|
75
|
-
if (!q.storage_type) throw new Error("storage_type is not defined");
|
|
76
|
-
param.storage_type = q.storage_type;
|
|
77
|
-
const group1names = [];
|
|
78
|
-
const conf1_group1 = [];
|
|
79
|
-
const conf2_group1 = [];
|
|
80
|
-
for (const s of param.samplelst.groups[0].values) {
|
|
81
|
-
if (!Number.isInteger(s.sampleId)) continue;
|
|
82
|
-
const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
|
|
83
|
-
if (!n) continue;
|
|
84
|
-
if (q.allSampleSet.has(n)) {
|
|
85
|
-
if (param.tw && !param.tw2) {
|
|
86
|
-
if (term_results.samples[s.sampleId]) {
|
|
87
|
-
if (param.tw.q.mode == "continuous") {
|
|
88
|
-
conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
|
|
89
|
-
} else {
|
|
90
|
-
conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
|
|
91
|
-
}
|
|
92
|
-
group1names.push(n);
|
|
93
|
-
}
|
|
94
|
-
} else if (!param.tw && param.tw2) {
|
|
95
|
-
if (term_results2.samples[s.sampleId]) {
|
|
96
|
-
if (param.tw2.q.mode == "continuous") {
|
|
97
|
-
conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
|
|
98
|
-
} else {
|
|
99
|
-
conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
|
|
100
|
-
}
|
|
101
|
-
group1names.push(n);
|
|
102
|
-
}
|
|
103
|
-
} else if (param.tw && param.tw2) {
|
|
104
|
-
if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
|
|
105
|
-
if (param.tw.q.mode == "continuous") {
|
|
106
|
-
conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
|
|
107
|
-
} else {
|
|
108
|
-
conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
|
|
109
|
-
}
|
|
110
|
-
if (param.tw2.q.mode == "continuous") {
|
|
111
|
-
conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
|
|
112
|
-
} else {
|
|
113
|
-
conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
|
|
114
|
-
}
|
|
115
|
-
group1names.push(n);
|
|
116
|
-
}
|
|
117
|
-
} else {
|
|
118
|
-
group1names.push(n);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
const group2names = [];
|
|
123
|
-
const conf1_group2 = [];
|
|
124
|
-
const conf2_group2 = [];
|
|
125
|
-
for (const s of param.samplelst.groups[1].values) {
|
|
126
|
-
if (!Number.isInteger(s.sampleId)) continue;
|
|
127
|
-
const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
|
|
128
|
-
if (!n) continue;
|
|
129
|
-
if (q.allSampleSet.has(n)) {
|
|
130
|
-
if (param.tw && !param.tw2) {
|
|
131
|
-
if (term_results.samples[s.sampleId]) {
|
|
132
|
-
if (param.tw.q.mode == "continuous") {
|
|
133
|
-
conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
|
|
134
|
-
} else {
|
|
135
|
-
conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
|
|
136
|
-
}
|
|
137
|
-
group2names.push(n);
|
|
138
|
-
}
|
|
139
|
-
} else if (!param.tw && param.tw2) {
|
|
140
|
-
if (term_results2.samples[s.sampleId]) {
|
|
141
|
-
if (param.tw2.q.mode == "continuous") {
|
|
142
|
-
conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
|
|
143
|
-
} else {
|
|
144
|
-
conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
|
|
145
|
-
}
|
|
146
|
-
group2names.push(n);
|
|
147
|
-
}
|
|
148
|
-
} else if (param.tw && param.tw2) {
|
|
149
|
-
if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
|
|
150
|
-
if (param.tw.q.mode == "continuous") {
|
|
151
|
-
conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
|
|
152
|
-
} else {
|
|
153
|
-
conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
|
|
154
|
-
}
|
|
155
|
-
if (param.tw2.q.mode == "continuous") {
|
|
156
|
-
conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
|
|
157
|
-
} else {
|
|
158
|
-
conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
|
|
159
|
-
}
|
|
160
|
-
group2names.push(n);
|
|
161
|
-
}
|
|
162
|
-
} else {
|
|
163
|
-
group2names.push(n);
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
const sample_size1 = group1names.length;
|
|
168
|
-
const sample_size2 = group2names.length;
|
|
169
|
-
const alerts = validateGroups(sample_size1, sample_size2, group1names, group2names);
|
|
170
|
-
if (param.preAnalysis) {
|
|
171
|
-
const group1Name = param.samplelst.groups[0].name;
|
|
172
|
-
const group2Name = param.samplelst.groups[1].name;
|
|
173
|
-
return {
|
|
174
|
-
data: {
|
|
175
|
-
[group1Name]: sample_size1,
|
|
176
|
-
[group2Name]: sample_size2,
|
|
177
|
-
...alerts.length ? { alert: alerts.join(" | ") } : {}
|
|
178
|
-
}
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
if (alerts.length) throw new Error(alerts.join(" | "));
|
|
182
|
-
const cases_string = group2names.map((i) => i).join(",");
|
|
183
|
-
const controls_string = group1names.map((i) => i).join(",");
|
|
184
|
-
const expression_input = {
|
|
185
|
-
case: cases_string,
|
|
186
|
-
control: controls_string,
|
|
187
|
-
data_type: "do_DE",
|
|
188
|
-
input_file: q.file,
|
|
189
|
-
cachedir: serverconfig.cachedir,
|
|
190
|
-
min_count: param.min_count,
|
|
191
|
-
min_total_count: param.min_total_count,
|
|
192
|
-
cpm_cutoff: param.cpm_cutoff,
|
|
193
|
-
storage_type: param.storage_type,
|
|
194
|
-
DE_method: param.method,
|
|
195
|
-
mds_cutoff: 1e4
|
|
196
|
-
// If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive. Number of genes * Number of samples < mds_cutoff for mds generation
|
|
197
|
-
};
|
|
198
|
-
if (param.tw) {
|
|
199
|
-
expression_input.conf1 = [...conf1_group2, ...conf1_group1];
|
|
200
|
-
expression_input.conf1_mode = param.tw.q.mode;
|
|
201
|
-
if (new Set(expression_input.conf1).size === 1) {
|
|
202
|
-
throw new Error("Confounding variable 1 has only one value");
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
if (param.tw2) {
|
|
206
|
-
expression_input.conf2 = [...conf2_group2, ...conf2_group1];
|
|
207
|
-
expression_input.conf2_mode = param.tw2.q.mode;
|
|
208
|
-
if (new Set(expression_input.conf2).size === 1) {
|
|
209
|
-
throw new Error("Confounding variable 2 has only one value");
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
const sample_size_limit = 8;
|
|
213
|
-
if (group1names.length <= sample_size_limit && group2names.length <= sample_size_limit || param.method == "edgeR" || param.method == "limma") {
|
|
214
|
-
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
215
|
-
const result2 = JSON.parse(await run_R("edge_newh5.R", JSON.stringify(expression_input)));
|
|
216
|
-
mayLog("Time taken to run edgeR:", formatElapsedTime(Date.now() - time12));
|
|
217
|
-
param.method = "edgeR";
|
|
218
|
-
const ql_imagePath = path.join(serverconfig.cachedir, result2.edgeR_ql_image_name[0]);
|
|
219
|
-
mayLog("ql_imagePath:", ql_imagePath);
|
|
220
|
-
await readFileAndDelete(ql_imagePath, "ql_image", result2);
|
|
221
|
-
if (result2.edgeR_mds_image_name) {
|
|
222
|
-
const mds_imagePath = path.join(serverconfig.cachedir, result2.edgeR_mds_image_name[0]);
|
|
223
|
-
mayLog("mds_imagePath:", mds_imagePath);
|
|
224
|
-
await readFileAndDelete(mds_imagePath, "mds_image", result2);
|
|
225
|
-
}
|
|
226
|
-
const images = [result2.ql_image];
|
|
227
|
-
if (result2.mds_image) images.push(result2.mds_image);
|
|
228
|
-
const output = {
|
|
229
|
-
data: result2.gene_data,
|
|
230
|
-
sample_size2: result2.num_cases[0],
|
|
231
|
-
sample_size1: result2.num_controls[0],
|
|
232
|
-
method: param.method,
|
|
233
|
-
images
|
|
234
|
-
};
|
|
235
|
-
if (result2.bcv && result2.bcv[0] !== null && result2.bcv[0] !== void 0) {
|
|
236
|
-
output.bcv = result2.bcv[0];
|
|
237
|
-
}
|
|
238
|
-
return output;
|
|
239
|
-
}
|
|
240
|
-
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
241
|
-
const result = JSON.parse(await run_rust("DEanalysis", JSON.stringify(expression_input)));
|
|
242
|
-
mayLog("Time taken to run rust DE pipeline:", formatElapsedTime(Date.now() - time1));
|
|
243
|
-
param.method = "wilcoxon";
|
|
244
|
-
return { data: result, sample_size1, sample_size2, method: param.method };
|
|
245
|
-
}
|
|
246
|
-
function validateGroups(sample_size1, sample_size2, group1names, group2names) {
|
|
247
|
-
const alerts = [];
|
|
248
|
-
if (sample_size1 < 1) alerts.push("sample size of group1 < 1");
|
|
249
|
-
if (sample_size2 < 1) alerts.push("sample size of group2 < 1");
|
|
250
|
-
const commonnames = group1names.filter((x) => group2names.includes(x));
|
|
251
|
-
if (commonnames.length) alerts.push(`Common elements found between both groups: ${commonnames.join(", ")}`);
|
|
252
|
-
return alerts;
|
|
253
|
-
}
|
|
254
|
-
async function readFileAndDelete(file, key, response) {
|
|
255
|
-
const plot = await fs.promises.readFile(file);
|
|
256
|
-
const plotBuffer = Buffer.from(plot).toString("base64");
|
|
257
|
-
const { width, height } = imageSize(file);
|
|
258
|
-
const obj = {
|
|
259
|
-
src: `data:image/png;base64,${plotBuffer}`,
|
|
260
|
-
size: `${width}x${height}`,
|
|
261
|
-
key
|
|
262
|
-
};
|
|
263
|
-
response[key] = obj;
|
|
264
|
-
fs.unlink(file, (err) => {
|
|
265
|
-
if (err) throw new Error(err.message || String(err));
|
|
266
|
-
});
|
|
267
|
-
}
|
|
268
61
|
async function validate_query_rnaseqGeneCount(ds) {
|
|
269
62
|
const q = ds.queries.rnaseqGeneCount;
|
|
270
63
|
if (!q) return;
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -7,8 +7,15 @@ import serverconfig from "#src/serverconfig.js";
|
|
|
7
7
|
import { gdc_validate_query_geneExpression } from "#src/mds3.gdc.js";
|
|
8
8
|
import { mayLimitSamples } from "#src/mds3.filter.js";
|
|
9
9
|
import { clusterMethodLst, distanceMethodLst } from "#shared/clustering.js";
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
10
|
+
import { getData } from "#src/termdb.matrix.js";
|
|
11
|
+
import {
|
|
12
|
+
GENE_EXPRESSION,
|
|
13
|
+
METABOLITE_INTENSITY,
|
|
14
|
+
NUMERIC_DICTIONARY_TERM,
|
|
15
|
+
termType2label,
|
|
16
|
+
ISOFORM_EXPRESSION,
|
|
17
|
+
PROTEOME_ABUNDANCE
|
|
18
|
+
} from "#shared/terms.js";
|
|
12
19
|
import { formatElapsedTime } from "#shared/time.js";
|
|
13
20
|
const api = {
|
|
14
21
|
endpoint: "termdb/cluster",
|
|
@@ -34,8 +41,9 @@ function init({ genomes }) {
|
|
|
34
41
|
if (!ds) throw "invalid dataset name";
|
|
35
42
|
if (ds.label === "GDC" && !ds.__gdc?.doneCaching)
|
|
36
43
|
throw "The server has not finished caching the case IDs: try again in about 2 minutes.";
|
|
37
|
-
if ([
|
|
38
|
-
if (!ds.queries?.[q.dataType]
|
|
44
|
+
if ([GENE_EXPRESSION, ISOFORM_EXPRESSION, METABOLITE_INTENSITY, NUMERIC_DICTIONARY_TERM].includes(q.dataType)) {
|
|
45
|
+
if (!ds.queries?.[q.dataType] && q.dataType !== NUMERIC_DICTIONARY_TERM)
|
|
46
|
+
throw `no ${q.dataType} data on this dataset`;
|
|
39
47
|
if (!q.terms) throw `missing gene list`;
|
|
40
48
|
if (!Array.isArray(q.terms)) throw `gene list is not an array`;
|
|
41
49
|
if (q.terms.length < 3)
|
|
@@ -43,7 +51,7 @@ function init({ genomes }) {
|
|
|
43
51
|
result = await getResult(q, ds);
|
|
44
52
|
} else if (PROTEOME_ABUNDANCE == q.dataType) {
|
|
45
53
|
const proteomeQuery = ds.queries?.proteome;
|
|
46
|
-
if (!proteomeQuery?.get) throw `no ${
|
|
54
|
+
if (!proteomeQuery?.get) throw `no ${PROTEOME_ABUNDANCE} data getter on this dataset`;
|
|
47
55
|
if (!q.terms) throw `missing gene list`;
|
|
48
56
|
if (!Array.isArray(q.terms)) throw `gene list is not an array`;
|
|
49
57
|
if (q.terms.length < 3)
|
|
@@ -64,13 +72,16 @@ function init({ genomes }) {
|
|
|
64
72
|
}
|
|
65
73
|
async function getResult(q, ds) {
|
|
66
74
|
let _q = q;
|
|
67
|
-
if (q.dataType ==
|
|
75
|
+
if (q.dataType == GENE_EXPRESSION) {
|
|
68
76
|
_q = JSON.parse(JSON.stringify(q));
|
|
69
77
|
_q.forClusteringAnalysis = true;
|
|
70
78
|
_q.__abortSignal = q.__abortSignal;
|
|
71
79
|
}
|
|
72
80
|
let term2sample2value, byTermId, bySampleId, skippedSexChrGenes;
|
|
73
|
-
if (q.dataType ==
|
|
81
|
+
if (q.dataType == NUMERIC_DICTIONARY_TERM) {
|
|
82
|
+
;
|
|
83
|
+
({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds));
|
|
84
|
+
} else if (q.dataType == PROTEOME_ABUNDANCE) {
|
|
74
85
|
;
|
|
75
86
|
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.get(_q));
|
|
76
87
|
} else {
|
|
@@ -90,7 +101,7 @@ async function getResult(q, ds) {
|
|
|
90
101
|
const removedHierClusterTerms = [];
|
|
91
102
|
if (noValueTerms.length) {
|
|
92
103
|
removedHierClusterTerms.push({
|
|
93
|
-
text: `Skipped ${q.dataType ==
|
|
104
|
+
text: `Skipped ${q.dataType == GENE_EXPRESSION ? "genes" : "items"} with no data`,
|
|
94
105
|
lst: noValueTerms
|
|
95
106
|
});
|
|
96
107
|
}
|
|
@@ -100,7 +111,7 @@ async function getResult(q, ds) {
|
|
|
100
111
|
if (term2sample2value.size == 0) throw "no data";
|
|
101
112
|
if (term2sample2value.size == 1) {
|
|
102
113
|
const g = Array.from(term2sample2value.keys())[0];
|
|
103
|
-
return { term: { gene: g, type:
|
|
114
|
+
return { term: { gene: g, type: GENE_EXPRESSION }, data: term2sample2value.get(g) };
|
|
104
115
|
}
|
|
105
116
|
const t = Date.now();
|
|
106
117
|
const clustering = await doClustering(term2sample2value, q, Object.keys(bySampleId).length);
|
|
@@ -109,6 +120,29 @@ async function getResult(q, ds) {
|
|
|
109
120
|
if (removedHierClusterTerms.length) result.removedHierClusterTerms = removedHierClusterTerms;
|
|
110
121
|
return result;
|
|
111
122
|
}
|
|
123
|
+
async function getNumericDictTermAnnotation(q, ds) {
|
|
124
|
+
const getDataArgs = {
|
|
125
|
+
// TODO: figure out when term is not a termwrapper
|
|
126
|
+
terms: q.terms.map((tw) => tw.term ? tw : { term: tw, q: { mode: "continuous" } }),
|
|
127
|
+
filter: q.filter,
|
|
128
|
+
filter0: q.filter0,
|
|
129
|
+
__protected__: q.__protected__
|
|
130
|
+
};
|
|
131
|
+
const data = await getData(getDataArgs, ds);
|
|
132
|
+
if (data.error) throw data.error;
|
|
133
|
+
const term2sample2value = /* @__PURE__ */ new Map();
|
|
134
|
+
for (const [key, sampleData] of Object.entries(data.samples)) {
|
|
135
|
+
for (const [term, value] of Object.entries(sampleData)) {
|
|
136
|
+
if (term !== "sample") {
|
|
137
|
+
if (!term2sample2value.has(term)) {
|
|
138
|
+
term2sample2value.set(term, {});
|
|
139
|
+
}
|
|
140
|
+
term2sample2value.get(term)[key] = value.value;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return { term2sample2value, byTermId: data.refs.byTermId, bySampleId: data.refs.bySampleId };
|
|
145
|
+
}
|
|
112
146
|
async function doClustering(data, q, numCases = 1e3) {
|
|
113
147
|
const sampleSet = /* @__PURE__ */ new Set();
|
|
114
148
|
let firstTerm = true;
|
|
@@ -297,6 +331,7 @@ async function validateNative(q, ds) {
|
|
|
297
331
|
const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
|
|
298
332
|
if (!sampleId) continue;
|
|
299
333
|
if (limitSamples && !limitSamples.has(sampleId)) continue;
|
|
334
|
+
if (!Number.isFinite(samplesData[sampleName])) continue;
|
|
300
335
|
s2v[sampleId] = samplesData[sampleName];
|
|
301
336
|
}
|
|
302
337
|
if (Object.keys(s2v).length) {
|
package/routes/termdb.config.js
CHANGED
|
@@ -52,6 +52,7 @@ function make(q, req, res, ds, genome) {
|
|
|
52
52
|
dataDownloadCatch: tdb.dataDownloadCatch,
|
|
53
53
|
matrix: tdb.matrix,
|
|
54
54
|
hierCluster: tdb.hierCluster,
|
|
55
|
+
numericDictTermCluster: tdb.numericDictTermCluster,
|
|
55
56
|
mclass: tdb.mclass,
|
|
56
57
|
alwaysRefillCategoricalTermValues: tdb.alwaysRefillCategoricalTermValues,
|
|
57
58
|
isGeneSetTermdb: tdb.isGeneSetTermdb,
|
|
@@ -3,6 +3,7 @@ import { getData } from "../src/termdb.matrix.js";
|
|
|
3
3
|
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
4
4
|
import { mayLog } from "#src/helpers.ts";
|
|
5
5
|
import { formatElapsedTime } from "#shared";
|
|
6
|
+
import { renderVolcano } from "../src/renderVolcano.ts";
|
|
6
7
|
const api = {
|
|
7
8
|
endpoint: "termdb/diffMeth",
|
|
8
9
|
methods: {
|
|
@@ -39,7 +40,7 @@ function init({ genomes }) {
|
|
|
39
40
|
throw new Error(
|
|
40
41
|
"Differential methylation analysis returned no data. Please verify sample selections and try again."
|
|
41
42
|
);
|
|
42
|
-
if (
|
|
43
|
+
if ("totalRows" in results.data && results.data.totalRows === 0)
|
|
43
44
|
throw new Error("No promoters passed filtering. Try relaxing group criteria or selecting more samples.");
|
|
44
45
|
res.send(results);
|
|
45
46
|
} catch (e) {
|
|
@@ -168,8 +169,9 @@ async function run_diffMeth(param, ds, term_results, term_results2) {
|
|
|
168
169
|
const time1 = Date.now();
|
|
169
170
|
const result = JSON.parse(await run_R("diffMeth.R", JSON.stringify(diffMethInput)));
|
|
170
171
|
mayLog("Time taken to run diffMeth:", formatElapsedTime(Date.now() - time1));
|
|
172
|
+
const rendered = await renderVolcano(result.promoter_data, param.volcanoRender);
|
|
171
173
|
const output = {
|
|
172
|
-
data:
|
|
174
|
+
data: rendered,
|
|
173
175
|
sample_size1,
|
|
174
176
|
sample_size2
|
|
175
177
|
};
|
|
@@ -51,9 +51,10 @@ function init({ genomes }) {
|
|
|
51
51
|
__abortSignal: q.__abortSignal
|
|
52
52
|
});
|
|
53
53
|
const controlSampleIds = cohortData.controlSampleIds || /* @__PURE__ */ new Set();
|
|
54
|
+
const prior = assay.cohorts[cohortName].prior;
|
|
54
55
|
for (const entry of cohortData.allEntries || []) {
|
|
55
56
|
const s2v = entry.s2v;
|
|
56
|
-
const stats = getCohortStats(s2v, controlSampleIds);
|
|
57
|
+
const stats = getCohortStats(s2v, controlSampleIds, prior);
|
|
57
58
|
delete entry.s2v;
|
|
58
59
|
entry.foldChange = stats.foldChange;
|
|
59
60
|
entry.pValue = stats.pValue;
|
|
@@ -71,7 +72,7 @@ function init({ genomes }) {
|
|
|
71
72
|
}
|
|
72
73
|
};
|
|
73
74
|
}
|
|
74
|
-
function getCohortStats(allS2v, controlSampleIds) {
|
|
75
|
+
function getCohortStats(allS2v, controlSampleIds, prior) {
|
|
75
76
|
if (!allS2v || typeof allS2v != "object") return { foldChange: null, pValue: null, testedN: 0, controlN: 0 };
|
|
76
77
|
const controlValues = [];
|
|
77
78
|
const testedValues = [];
|
|
@@ -84,7 +85,10 @@ function getCohortStats(allS2v, controlSampleIds) {
|
|
|
84
85
|
const controlMean = controlValues?.length ? controlValues.reduce((sum, v) => sum + v, 0) / controlValues.length : null;
|
|
85
86
|
const testedMean = testedValues?.length ? testedValues.reduce((sum, v) => sum + v, 0) / testedValues.length : null;
|
|
86
87
|
const foldChange = testedMean != null && controlMean != null && Number.isFinite(testedMean) && Number.isFinite(controlMean) && controlMean !== 0 ? testedMean / controlMean : null;
|
|
87
|
-
|
|
88
|
+
if (!Number.isFinite(prior?.d0) || prior.d0 <= 0 || !Number.isFinite(prior?.s0sq) || prior.s0sq <= 0) {
|
|
89
|
+
throw "prior with finite positive d0 and s0sq is required for moderated t-test";
|
|
90
|
+
}
|
|
91
|
+
const pValue = getModeratedPValue(testedValues, controlValues, prior);
|
|
88
92
|
return {
|
|
89
93
|
foldChange,
|
|
90
94
|
pValue,
|
|
@@ -92,36 +96,38 @@ function getCohortStats(allS2v, controlSampleIds) {
|
|
|
92
96
|
controlN: controlValues.length
|
|
93
97
|
};
|
|
94
98
|
}
|
|
95
|
-
function
|
|
99
|
+
function getModeratedPValue(a, b, prior) {
|
|
96
100
|
const n1 = a.length;
|
|
97
101
|
const n2 = b.length;
|
|
98
102
|
if (n1 < 2 || n2 < 2) return null;
|
|
99
103
|
const mean1 = a.reduce((s, v) => s + v, 0) / n1;
|
|
100
104
|
const mean2 = b.reduce((s, v) => s + v, 0) / n2;
|
|
101
|
-
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
105
|
+
let ss1 = 0;
|
|
106
|
+
for (const v of a) {
|
|
107
|
+
const d = v - mean1;
|
|
108
|
+
ss1 += d * d;
|
|
109
|
+
}
|
|
110
|
+
let ss2 = 0;
|
|
111
|
+
for (const v of b) {
|
|
112
|
+
const d = v - mean2;
|
|
113
|
+
ss2 += d * d;
|
|
114
|
+
}
|
|
115
|
+
const dfResidual = n1 + n2 - 2;
|
|
116
|
+
const pooledVar = (ss1 + ss2) / dfResidual;
|
|
117
|
+
const { d0, s0sq } = prior;
|
|
118
|
+
const sTildeSq = (d0 * s0sq + dfResidual * pooledVar) / (d0 + dfResidual);
|
|
119
|
+
const se = Math.sqrt(sTildeSq * (1 / n1 + 1 / n2));
|
|
120
|
+
if (!(se > 0)) {
|
|
106
121
|
if (mean1 === mean2) return 1;
|
|
107
122
|
return 1e-300;
|
|
108
123
|
}
|
|
109
|
-
const t = (mean1 - mean2) /
|
|
110
|
-
const df =
|
|
124
|
+
const t = (mean1 - mean2) / se;
|
|
125
|
+
const df = d0 + dfResidual;
|
|
111
126
|
if (!Number.isFinite(df) || df < 0.1) return null;
|
|
112
127
|
const p = 2 * tCdfTail(Math.abs(t), df);
|
|
113
128
|
if (!Number.isFinite(p)) return null;
|
|
114
129
|
return Math.max(1e-300, Math.min(1, p));
|
|
115
130
|
}
|
|
116
|
-
function sampleVariance(lst, mean) {
|
|
117
|
-
if (lst.length < 2) return NaN;
|
|
118
|
-
let sumsq = 0;
|
|
119
|
-
for (const v of lst) {
|
|
120
|
-
const d = v - mean;
|
|
121
|
-
sumsq += d * d;
|
|
122
|
-
}
|
|
123
|
-
return sumsq / (lst.length - 1);
|
|
124
|
-
}
|
|
125
131
|
function tCdfTail(t, df) {
|
|
126
132
|
const x = df / (df + t * t);
|
|
127
133
|
return 0.5 * regularizedBetaIncomplete(df / 2, 0.5, x);
|
|
@@ -217,6 +223,8 @@ async function validate_query_proteome(ds) {
|
|
|
217
223
|
if (!cohort.controlFilter)
|
|
218
224
|
throw `Missing controlFilter in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
|
|
219
225
|
if (!cohort.caseFilter) throw `Missing caseFilter in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
|
|
226
|
+
if (!cohort.prior?.d0 || !cohort.prior?.s0sq)
|
|
227
|
+
throw `Missing prior.d0 and prior.s0sq in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
|
|
220
228
|
}
|
|
221
229
|
} else {
|
|
222
230
|
throw `Invalid assay structure for "${assayName}". Must have .cohorts`;
|
|
@@ -25,7 +25,8 @@ function init({ genomes }) {
|
|
|
25
25
|
if (!ds.queries?.singleCell?.DEgenes || !ds.queries.singleCell.DEgenes.get)
|
|
26
26
|
throw new Error("DE genes not supported on this dataset.");
|
|
27
27
|
result = await ds.queries.singleCell.DEgenes.get(q);
|
|
28
|
-
|
|
28
|
+
const isEmpty = !result || !result.data || (Array.isArray(result.data) ? result.data.length === 0 : !result.data.totalRows);
|
|
29
|
+
if (isEmpty) {
|
|
29
30
|
result = {
|
|
30
31
|
status: 404,
|
|
31
32
|
error: !result ? "No data found." : "No differentially expressed genes found."
|
|
@@ -48,11 +48,11 @@ async function validate_query_singleCell(ds, genome) {
|
|
|
48
48
|
const q = ds.queries.singleCell;
|
|
49
49
|
if (!q) return;
|
|
50
50
|
if (typeof q.samples != "object") throw new Error("singleCell.samples{} not object");
|
|
51
|
+
if (typeof q.data != "object") throw new Error("singleCell.data{} not object");
|
|
51
52
|
if (typeof q.samples.get == "function") {
|
|
52
53
|
} else {
|
|
53
|
-
await
|
|
54
|
+
await validateSamples(q, ds);
|
|
54
55
|
}
|
|
55
|
-
if (typeof q.data != "object") throw new Error("singleCell.data{} not object");
|
|
56
56
|
if (q.data.src == "gdcapi") {
|
|
57
57
|
gdc_validate_query_singleCell_data(ds, genome);
|
|
58
58
|
} else if (q.data.src == "native") {
|
|
@@ -85,7 +85,8 @@ function validateImages(images) {
|
|
|
85
85
|
if (!images.label) images.label = "Images";
|
|
86
86
|
if (!images.fileName) throw new Error("images.fileName missing");
|
|
87
87
|
}
|
|
88
|
-
async function
|
|
88
|
+
async function validateSamples(q, ds) {
|
|
89
|
+
const S = q.samples, D = q.data;
|
|
89
90
|
const samples = /* @__PURE__ */ new Map();
|
|
90
91
|
for (const plot of D.plots) {
|
|
91
92
|
for (const fn of await fs.promises.readdir(path.join(serverconfig.tpmasterdir, plot.folder))) {
|
|
@@ -102,6 +103,8 @@ async function validateSamplesNative(S, D, ds) {
|
|
|
102
103
|
}
|
|
103
104
|
if (!plot.colorColumns || plot.colorColumns.length == 0) continue;
|
|
104
105
|
}
|
|
106
|
+
if (samples.size == 0) throw new Error("no scrna samples found");
|
|
107
|
+
console.log(samples.size, "singleCell samples loaded from " + ds.label);
|
|
105
108
|
if (S.sampleColumns) {
|
|
106
109
|
for (const { termid } of S.sampleColumns) {
|
|
107
110
|
const term = ds.cohort.termdb.q.termjsonByOneid(termid);
|
|
@@ -114,7 +117,13 @@ async function validateSamplesNative(S, D, ds) {
|
|
|
114
117
|
}
|
|
115
118
|
}
|
|
116
119
|
S.get = () => {
|
|
117
|
-
|
|
120
|
+
const re = { samples: [...samples.values()] };
|
|
121
|
+
if (q.metaResults) {
|
|
122
|
+
re.metaResults = q.metaResults.map((i) => {
|
|
123
|
+
return { name: i.name };
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
return re;
|
|
118
127
|
};
|
|
119
128
|
}
|
|
120
129
|
function validateDataNative(D, ds) {
|
|
@@ -126,6 +135,28 @@ function validateDataNative(D, ds) {
|
|
|
126
135
|
}
|
|
127
136
|
const file2Lines = {};
|
|
128
137
|
D.get = async (q) => {
|
|
138
|
+
if (q.checkPlotAvailability) {
|
|
139
|
+
const plots2 = [];
|
|
140
|
+
for (const plot of D.plots) {
|
|
141
|
+
if (!q.plots.includes(plot.name)) continue;
|
|
142
|
+
const tsvfile = path.join(
|
|
143
|
+
serverconfig.tpmasterdir,
|
|
144
|
+
plot.folder,
|
|
145
|
+
(q.sample?.eID || q.sample?.sID) + (plot.fileSuffix || "")
|
|
146
|
+
);
|
|
147
|
+
try {
|
|
148
|
+
await file_is_readable(tsvfile);
|
|
149
|
+
plots2.push({
|
|
150
|
+
name: plot.name,
|
|
151
|
+
expCells: [],
|
|
152
|
+
// FIXME avoid breaking client but shouldn't be needed
|
|
153
|
+
noExpCells: []
|
|
154
|
+
});
|
|
155
|
+
} catch (_) {
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
return { plots: plots2 };
|
|
159
|
+
}
|
|
129
160
|
const plots = [];
|
|
130
161
|
let geneExpMap;
|
|
131
162
|
if (ds.queries.singleCell.geneExpression && q.gene) {
|
|
@@ -138,7 +169,7 @@ function validateDataNative(D, ds) {
|
|
|
138
169
|
const tsvfile = path.join(
|
|
139
170
|
serverconfig.tpmasterdir,
|
|
140
171
|
plot.folder,
|
|
141
|
-
(q.sample?.eID || q.sample?.sID) + plot.fileSuffix
|
|
172
|
+
(q.sample?.eID || q.sample?.sID) + (plot.fileSuffix || "")
|
|
142
173
|
);
|
|
143
174
|
if (!file2Lines[tsvfile]) {
|
|
144
175
|
await file_is_readable(tsvfile);
|