@sjcrh/proteinpaint-server 2.184.0 → 2.185.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/protected.test.js +5 -0
- package/dataset/termdb.test.js +1 -1
- package/package.json +5 -5
- package/routes/brainImagingSamples.js +15 -4
- package/routes/genesetEnrichment.js +101 -42
- package/routes/profile.radar2.js +112 -0
- package/routes/profile.radarFacility2.js +148 -0
- package/routes/saveWSIAnnotation.js +21 -0
- package/routes/termdb.DE.js +31 -238
- package/routes/termdb.chat3.js +191 -0
- package/routes/termdb.cluster.js +44 -9
- package/routes/termdb.config.js +5 -3
- package/routes/termdb.diffMeth.js +4 -2
- package/routes/termdb.proteome.js +28 -20
- package/routes/termdb.singlecellDEgenes.js +2 -1
- package/routes/termdb.singlecellSamples.js +36 -5
- package/src/app.js +3517 -2542
- package/src/serverconfig.js +16 -1
package/routes/termdb.DE.js
CHANGED
|
@@ -1,15 +1,11 @@
|
|
|
1
|
-
import fs from "fs";
|
|
2
1
|
import path from "path";
|
|
3
2
|
import { diffExpPayload } from "#types/checkers";
|
|
4
|
-
import { run_rust } from "@sjcrh/proteinpaint-rust";
|
|
5
|
-
import { getData } from "../src/termdb.matrix.js";
|
|
6
|
-
import { get_ds_tdb } from "../src/termdb.js";
|
|
7
|
-
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
8
3
|
import { mayLog } from "#src/helpers.ts";
|
|
9
4
|
import serverconfig from "../src/serverconfig.js";
|
|
10
|
-
import { imageSize } from "image-size";
|
|
11
5
|
import { get_header_txt } from "#src/utils.js";
|
|
12
|
-
import {
|
|
6
|
+
import { run_rust } from "@sjcrh/proteinpaint-rust";
|
|
7
|
+
import { renderVolcano } from "../src/renderVolcano.ts";
|
|
8
|
+
import { readCacheFileOrRecompute, resolveDeContext, resolveSampleGroups } from "../src/diffAnalysis.ts";
|
|
13
9
|
const api = {
|
|
14
10
|
endpoint: "termdb/DE",
|
|
15
11
|
methods: {
|
|
@@ -27,244 +23,41 @@ function init({ genomes }) {
|
|
|
27
23
|
return async (req, res) => {
|
|
28
24
|
try {
|
|
29
25
|
const q = req.query;
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
);
|
|
44
|
-
if (term_results.error) throw new Error(term_results.error);
|
|
45
|
-
}
|
|
46
|
-
let term_results2 = [];
|
|
47
|
-
if (q.tw2) {
|
|
48
|
-
const terms2 = [q.tw2];
|
|
49
|
-
term_results2 = await getData(
|
|
50
|
-
{
|
|
51
|
-
filter: q.filter,
|
|
52
|
-
filter0: q.filter0,
|
|
53
|
-
terms: terms2
|
|
54
|
-
},
|
|
55
|
-
ds
|
|
56
|
-
);
|
|
57
|
-
if (term_results2.error) throw new Error(term_results2.error);
|
|
26
|
+
if (q.preAnalysis) {
|
|
27
|
+
const { ds, term_results, term_results2 } = await resolveDeContext(q, genomes);
|
|
28
|
+
const groups = resolveSampleGroups(q, ds, term_results, term_results2);
|
|
29
|
+
const group1Name = q.samplelst.groups[0].name;
|
|
30
|
+
const group2Name = q.samplelst.groups[1].name;
|
|
31
|
+
res.send({
|
|
32
|
+
data: {
|
|
33
|
+
[group1Name]: groups.group1names.length,
|
|
34
|
+
[group2Name]: groups.group2names.length,
|
|
35
|
+
...groups.alerts.length ? { alert: groups.alerts.join(" | ") } : {}
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
return;
|
|
58
39
|
}
|
|
59
|
-
const
|
|
60
|
-
|
|
61
|
-
|
|
40
|
+
const { cacheId, geneData, sample_size1, sample_size2, method, images, bcv } = await readCacheFileOrRecompute({
|
|
41
|
+
daRequest: q,
|
|
42
|
+
genomes
|
|
43
|
+
});
|
|
44
|
+
const rendered = await renderVolcano(geneData, q.volcanoRender);
|
|
45
|
+
rendered.cacheId = cacheId;
|
|
46
|
+
const output = {
|
|
47
|
+
data: rendered,
|
|
48
|
+
sample_size1,
|
|
49
|
+
sample_size2,
|
|
50
|
+
method,
|
|
51
|
+
images
|
|
52
|
+
};
|
|
53
|
+
if (bcv != null) output.bcv = bcv;
|
|
54
|
+
res.send(output);
|
|
62
55
|
} catch (e) {
|
|
63
56
|
res.send({ status: "error", error: e.message || e });
|
|
64
57
|
if (e instanceof Error && e.stack) console.log(e);
|
|
65
58
|
}
|
|
66
59
|
};
|
|
67
60
|
}
|
|
68
|
-
async function run_DE(param, ds, term_results, term_results2) {
|
|
69
|
-
if (param.samplelst?.groups?.length != 2) throw new Error(".samplelst.groups.length!=2");
|
|
70
|
-
if (param.samplelst.groups[0].values?.length < 1) throw new Error("samplelst.groups[0].values.length<1");
|
|
71
|
-
if (param.samplelst.groups[1].values?.length < 1) throw new Error("samplelst.groups[1].values.length<1");
|
|
72
|
-
const q = ds.queries.rnaseqGeneCount;
|
|
73
|
-
if (!q) return;
|
|
74
|
-
if (!q.file) throw new Error("unknown data type for rnaseqGeneCount");
|
|
75
|
-
if (!q.storage_type) throw new Error("storage_type is not defined");
|
|
76
|
-
param.storage_type = q.storage_type;
|
|
77
|
-
const group1names = [];
|
|
78
|
-
const conf1_group1 = [];
|
|
79
|
-
const conf2_group1 = [];
|
|
80
|
-
for (const s of param.samplelst.groups[0].values) {
|
|
81
|
-
if (!Number.isInteger(s.sampleId)) continue;
|
|
82
|
-
const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
|
|
83
|
-
if (!n) continue;
|
|
84
|
-
if (q.allSampleSet.has(n)) {
|
|
85
|
-
if (param.tw && !param.tw2) {
|
|
86
|
-
if (term_results.samples[s.sampleId]) {
|
|
87
|
-
if (param.tw.q.mode == "continuous") {
|
|
88
|
-
conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
|
|
89
|
-
} else {
|
|
90
|
-
conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
|
|
91
|
-
}
|
|
92
|
-
group1names.push(n);
|
|
93
|
-
}
|
|
94
|
-
} else if (!param.tw && param.tw2) {
|
|
95
|
-
if (term_results2.samples[s.sampleId]) {
|
|
96
|
-
if (param.tw2.q.mode == "continuous") {
|
|
97
|
-
conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
|
|
98
|
-
} else {
|
|
99
|
-
conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
|
|
100
|
-
}
|
|
101
|
-
group1names.push(n);
|
|
102
|
-
}
|
|
103
|
-
} else if (param.tw && param.tw2) {
|
|
104
|
-
if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
|
|
105
|
-
if (param.tw.q.mode == "continuous") {
|
|
106
|
-
conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
|
|
107
|
-
} else {
|
|
108
|
-
conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
|
|
109
|
-
}
|
|
110
|
-
if (param.tw2.q.mode == "continuous") {
|
|
111
|
-
conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
|
|
112
|
-
} else {
|
|
113
|
-
conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
|
|
114
|
-
}
|
|
115
|
-
group1names.push(n);
|
|
116
|
-
}
|
|
117
|
-
} else {
|
|
118
|
-
group1names.push(n);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
const group2names = [];
|
|
123
|
-
const conf1_group2 = [];
|
|
124
|
-
const conf2_group2 = [];
|
|
125
|
-
for (const s of param.samplelst.groups[1].values) {
|
|
126
|
-
if (!Number.isInteger(s.sampleId)) continue;
|
|
127
|
-
const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
|
|
128
|
-
if (!n) continue;
|
|
129
|
-
if (q.allSampleSet.has(n)) {
|
|
130
|
-
if (param.tw && !param.tw2) {
|
|
131
|
-
if (term_results.samples[s.sampleId]) {
|
|
132
|
-
if (param.tw.q.mode == "continuous") {
|
|
133
|
-
conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
|
|
134
|
-
} else {
|
|
135
|
-
conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
|
|
136
|
-
}
|
|
137
|
-
group2names.push(n);
|
|
138
|
-
}
|
|
139
|
-
} else if (!param.tw && param.tw2) {
|
|
140
|
-
if (term_results2.samples[s.sampleId]) {
|
|
141
|
-
if (param.tw2.q.mode == "continuous") {
|
|
142
|
-
conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
|
|
143
|
-
} else {
|
|
144
|
-
conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
|
|
145
|
-
}
|
|
146
|
-
group2names.push(n);
|
|
147
|
-
}
|
|
148
|
-
} else if (param.tw && param.tw2) {
|
|
149
|
-
if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
|
|
150
|
-
if (param.tw.q.mode == "continuous") {
|
|
151
|
-
conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
|
|
152
|
-
} else {
|
|
153
|
-
conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
|
|
154
|
-
}
|
|
155
|
-
if (param.tw2.q.mode == "continuous") {
|
|
156
|
-
conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
|
|
157
|
-
} else {
|
|
158
|
-
conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
|
|
159
|
-
}
|
|
160
|
-
group2names.push(n);
|
|
161
|
-
}
|
|
162
|
-
} else {
|
|
163
|
-
group2names.push(n);
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
const sample_size1 = group1names.length;
|
|
168
|
-
const sample_size2 = group2names.length;
|
|
169
|
-
const alerts = validateGroups(sample_size1, sample_size2, group1names, group2names);
|
|
170
|
-
if (param.preAnalysis) {
|
|
171
|
-
const group1Name = param.samplelst.groups[0].name;
|
|
172
|
-
const group2Name = param.samplelst.groups[1].name;
|
|
173
|
-
return {
|
|
174
|
-
data: {
|
|
175
|
-
[group1Name]: sample_size1,
|
|
176
|
-
[group2Name]: sample_size2,
|
|
177
|
-
...alerts.length ? { alert: alerts.join(" | ") } : {}
|
|
178
|
-
}
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
if (alerts.length) throw new Error(alerts.join(" | "));
|
|
182
|
-
const cases_string = group2names.map((i) => i).join(",");
|
|
183
|
-
const controls_string = group1names.map((i) => i).join(",");
|
|
184
|
-
const expression_input = {
|
|
185
|
-
case: cases_string,
|
|
186
|
-
control: controls_string,
|
|
187
|
-
data_type: "do_DE",
|
|
188
|
-
input_file: q.file,
|
|
189
|
-
cachedir: serverconfig.cachedir,
|
|
190
|
-
min_count: param.min_count,
|
|
191
|
-
min_total_count: param.min_total_count,
|
|
192
|
-
cpm_cutoff: param.cpm_cutoff,
|
|
193
|
-
storage_type: param.storage_type,
|
|
194
|
-
DE_method: param.method,
|
|
195
|
-
mds_cutoff: 1e4
|
|
196
|
-
// If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive. Number of genes * Number of samples < mds_cutoff for mds generation
|
|
197
|
-
};
|
|
198
|
-
if (param.tw) {
|
|
199
|
-
expression_input.conf1 = [...conf1_group2, ...conf1_group1];
|
|
200
|
-
expression_input.conf1_mode = param.tw.q.mode;
|
|
201
|
-
if (new Set(expression_input.conf1).size === 1) {
|
|
202
|
-
throw new Error("Confounding variable 1 has only one value");
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
if (param.tw2) {
|
|
206
|
-
expression_input.conf2 = [...conf2_group2, ...conf2_group1];
|
|
207
|
-
expression_input.conf2_mode = param.tw2.q.mode;
|
|
208
|
-
if (new Set(expression_input.conf2).size === 1) {
|
|
209
|
-
throw new Error("Confounding variable 2 has only one value");
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
const sample_size_limit = 8;
|
|
213
|
-
if (group1names.length <= sample_size_limit && group2names.length <= sample_size_limit || param.method == "edgeR" || param.method == "limma") {
|
|
214
|
-
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
215
|
-
const result2 = JSON.parse(await run_R("edge_newh5.R", JSON.stringify(expression_input)));
|
|
216
|
-
mayLog("Time taken to run edgeR:", formatElapsedTime(Date.now() - time12));
|
|
217
|
-
param.method = "edgeR";
|
|
218
|
-
const ql_imagePath = path.join(serverconfig.cachedir, result2.edgeR_ql_image_name[0]);
|
|
219
|
-
mayLog("ql_imagePath:", ql_imagePath);
|
|
220
|
-
await readFileAndDelete(ql_imagePath, "ql_image", result2);
|
|
221
|
-
if (result2.edgeR_mds_image_name) {
|
|
222
|
-
const mds_imagePath = path.join(serverconfig.cachedir, result2.edgeR_mds_image_name[0]);
|
|
223
|
-
mayLog("mds_imagePath:", mds_imagePath);
|
|
224
|
-
await readFileAndDelete(mds_imagePath, "mds_image", result2);
|
|
225
|
-
}
|
|
226
|
-
const images = [result2.ql_image];
|
|
227
|
-
if (result2.mds_image) images.push(result2.mds_image);
|
|
228
|
-
const output = {
|
|
229
|
-
data: result2.gene_data,
|
|
230
|
-
sample_size2: result2.num_cases[0],
|
|
231
|
-
sample_size1: result2.num_controls[0],
|
|
232
|
-
method: param.method,
|
|
233
|
-
images
|
|
234
|
-
};
|
|
235
|
-
if (result2.bcv && result2.bcv[0] !== null && result2.bcv[0] !== void 0) {
|
|
236
|
-
output.bcv = result2.bcv[0];
|
|
237
|
-
}
|
|
238
|
-
return output;
|
|
239
|
-
}
|
|
240
|
-
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
241
|
-
const result = JSON.parse(await run_rust("DEanalysis", JSON.stringify(expression_input)));
|
|
242
|
-
mayLog("Time taken to run rust DE pipeline:", formatElapsedTime(Date.now() - time1));
|
|
243
|
-
param.method = "wilcoxon";
|
|
244
|
-
return { data: result, sample_size1, sample_size2, method: param.method };
|
|
245
|
-
}
|
|
246
|
-
function validateGroups(sample_size1, sample_size2, group1names, group2names) {
|
|
247
|
-
const alerts = [];
|
|
248
|
-
if (sample_size1 < 1) alerts.push("sample size of group1 < 1");
|
|
249
|
-
if (sample_size2 < 1) alerts.push("sample size of group2 < 1");
|
|
250
|
-
const commonnames = group1names.filter((x) => group2names.includes(x));
|
|
251
|
-
if (commonnames.length) alerts.push(`Common elements found between both groups: ${commonnames.join(", ")}`);
|
|
252
|
-
return alerts;
|
|
253
|
-
}
|
|
254
|
-
async function readFileAndDelete(file, key, response) {
|
|
255
|
-
const plot = await fs.promises.readFile(file);
|
|
256
|
-
const plotBuffer = Buffer.from(plot).toString("base64");
|
|
257
|
-
const { width, height } = imageSize(file);
|
|
258
|
-
const obj = {
|
|
259
|
-
src: `data:image/png;base64,${plotBuffer}`,
|
|
260
|
-
size: `${width}x${height}`,
|
|
261
|
-
key
|
|
262
|
-
};
|
|
263
|
-
response[key] = obj;
|
|
264
|
-
fs.unlink(file, (err) => {
|
|
265
|
-
if (err) throw new Error(err.message || String(err));
|
|
266
|
-
});
|
|
267
|
-
}
|
|
268
61
|
async function validate_query_rnaseqGeneCount(ds) {
|
|
269
62
|
const q = ds.queries.rnaseqGeneCount;
|
|
270
63
|
if (!q) return;
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import { ChatPayload } from "#types/checkers";
|
|
2
|
+
import { mayLog } from "#src/helpers.ts";
|
|
3
|
+
import { formatElapsedTime } from "#shared";
|
|
4
|
+
import { readJSONFile, parse_geneset_db } from "./chat/utils.ts";
|
|
5
|
+
import { classifyQuery } from "./chat/classify1.ts";
|
|
6
|
+
import { classifyPlotType } from "./chat/plot.ts";
|
|
7
|
+
import { classifyNotPlot } from "./chat/classify2.ts";
|
|
8
|
+
import { inferScaffold } from "./chat/scaffold.ts";
|
|
9
|
+
import serverconfig from "../src/serverconfig.js";
|
|
10
|
+
import { getDsAllowedTermTypes } from "./termdb.config.ts";
|
|
11
|
+
import { phrase2entity } from "./chat/phrase2entity.ts";
|
|
12
|
+
import { inferTermObjFromEntity } from "./chat/entity2termObj.ts";
|
|
13
|
+
import { resolveToTwTvs } from "./chat/entity2twTvs.ts";
|
|
14
|
+
import path from "path";
|
|
15
|
+
import fs from "fs";
|
|
16
|
+
import { resolveToPlotState } from "./chat/scaffold2state.ts";
|
|
17
|
+
const api = {
|
|
18
|
+
endpoint: "termdb/chat3",
|
|
19
|
+
methods: {
|
|
20
|
+
get: {
|
|
21
|
+
...ChatPayload,
|
|
22
|
+
init
|
|
23
|
+
},
|
|
24
|
+
post: {
|
|
25
|
+
...ChatPayload,
|
|
26
|
+
init
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
function init({ genomes }) {
|
|
31
|
+
return async (req, res) => {
|
|
32
|
+
const q = req.query;
|
|
33
|
+
try {
|
|
34
|
+
const g = genomes[q.genome];
|
|
35
|
+
if (!g) throw "invalid genome";
|
|
36
|
+
const ds = g.datasets?.[q.dslabel];
|
|
37
|
+
if (!ds) throw "invalid dslabel";
|
|
38
|
+
const aiFilesDir = serverconfig.binpath + "/../../dataset/ai/" + q.dslabel;
|
|
39
|
+
let agentFiles = [];
|
|
40
|
+
try {
|
|
41
|
+
agentFiles = await fs.readdirSync(aiFilesDir).filter((file) => file.endsWith(".json"));
|
|
42
|
+
} catch (err) {
|
|
43
|
+
if (err.code === "ENOENT") throw new Error(`Directory not found: ${aiFilesDir}`);
|
|
44
|
+
if (err.code === "ENOTDIR") throw new Error(`Path is not a directory: ${aiFilesDir}`);
|
|
45
|
+
throw err;
|
|
46
|
+
}
|
|
47
|
+
const llm = serverconfig.llm;
|
|
48
|
+
if (!llm) throw "serverconfig.llm is not configured";
|
|
49
|
+
if (llm.provider !== "SJ" && llm.provider !== "ollama" && llm.provider !== "huggingface" && llm.provider !== "azure") {
|
|
50
|
+
throw "llm.provider must be 'SJ', 'ollama', 'huggingface', or 'azure'";
|
|
51
|
+
}
|
|
52
|
+
const rawFilter = typeof q.filter === "string" ? JSON.parse(q.filter) : q.filter;
|
|
53
|
+
const filter = rawFilter && typeof rawFilter === "object" ? rawFilter : {};
|
|
54
|
+
const lst = Array.isArray(filter.lst) ? filter.lst : [];
|
|
55
|
+
const cohortFilter = lst.find((item) => item.tag === "cohortFilter");
|
|
56
|
+
const cohortKey = cohortFilter ? cohortFilter.tvs.values[0].key : "";
|
|
57
|
+
const supportedChartTypes = ds.cohort.termdb.q?.getSupportedChartTypes(req)?.[cohortKey];
|
|
58
|
+
const genedb = serverconfig.tpmasterdir + "/" + g.genedb.dbfile;
|
|
59
|
+
const _allowedTermTypes = getDsAllowedTermTypes(ds);
|
|
60
|
+
const ai_output_json = await run_chat_pipeline(
|
|
61
|
+
q.prompt,
|
|
62
|
+
llm,
|
|
63
|
+
ds,
|
|
64
|
+
genedb,
|
|
65
|
+
agentFiles,
|
|
66
|
+
aiFilesDir,
|
|
67
|
+
supportedChartTypes,
|
|
68
|
+
_allowedTermTypes
|
|
69
|
+
// testing
|
|
70
|
+
);
|
|
71
|
+
mayLog("From init: Final AI output JSON:", JSON.stringify(ai_output_json));
|
|
72
|
+
res.send(ai_output_json);
|
|
73
|
+
} catch (e) {
|
|
74
|
+
if (e.stack) mayLog(e.stack);
|
|
75
|
+
res.send({ error: e?.message || e });
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFilesDir, supportedChartTypes, _allowedTermTypes) {
|
|
80
|
+
if (!fs.existsSync(path.join(aiFilesDir, "main.json")))
|
|
81
|
+
throw "Main data file is not specified for dataset:" + ds.label;
|
|
82
|
+
const dataset_json = await readJSONFile(path.join(aiFilesDir, "main.json"));
|
|
83
|
+
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
84
|
+
const class_response = await classifyQuery(user_prompt, llm);
|
|
85
|
+
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
86
|
+
let ai_output_json;
|
|
87
|
+
if (class_response.type == "notplot") {
|
|
88
|
+
const time2 = (/* @__PURE__ */ new Date()).valueOf();
|
|
89
|
+
const notPlotResult = await classifyNotPlot(user_prompt, llm, agentFiles, aiFilesDir);
|
|
90
|
+
mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
|
|
91
|
+
if (notPlotResult.type == "html") {
|
|
92
|
+
ai_output_json = notPlotResult;
|
|
93
|
+
} else {
|
|
94
|
+
ai_output_json = {
|
|
95
|
+
type: "text",
|
|
96
|
+
text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
} else if (class_response.type == "plot") {
|
|
100
|
+
let time = (/* @__PURE__ */ new Date()).valueOf();
|
|
101
|
+
const plotType = await classifyPlotType(user_prompt, llm);
|
|
102
|
+
mayLog("Time taken to classify plot type:", formatElapsedTime(Date.now() - time));
|
|
103
|
+
if (!supportedChartTypes) {
|
|
104
|
+
const errorMsg = "Supported chart types list is undefined. Please check the dataset configuration and ensure that getSupportedChartTypes is implemented correctly. Skipping chart type validation, but this may lead to unsupported chart type errors downstream.";
|
|
105
|
+
console.warn(errorMsg);
|
|
106
|
+
const errorResponse = {
|
|
107
|
+
type: "text",
|
|
108
|
+
text: errorMsg
|
|
109
|
+
};
|
|
110
|
+
return errorResponse;
|
|
111
|
+
}
|
|
112
|
+
if (plotType === "summary") {
|
|
113
|
+
if (!supportedChartTypes.includes("dictionary")) {
|
|
114
|
+
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
115
|
+
ai_output_json = {
|
|
116
|
+
type: "text",
|
|
117
|
+
text: log
|
|
118
|
+
};
|
|
119
|
+
mayLog(log);
|
|
120
|
+
return ai_output_json;
|
|
121
|
+
}
|
|
122
|
+
} else if (plotType === "dge") {
|
|
123
|
+
if (!supportedChartTypes.includes("DA")) {
|
|
124
|
+
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
125
|
+
ai_output_json = {
|
|
126
|
+
type: "text",
|
|
127
|
+
text: log
|
|
128
|
+
};
|
|
129
|
+
mayLog(log);
|
|
130
|
+
return ai_output_json;
|
|
131
|
+
}
|
|
132
|
+
} else {
|
|
133
|
+
mayLog(`Supported chart types for this cohort: ${supportedChartTypes}`);
|
|
134
|
+
if (!supportedChartTypes.includes(plotType)) {
|
|
135
|
+
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
136
|
+
ai_output_json = {
|
|
137
|
+
type: "text",
|
|
138
|
+
text: log
|
|
139
|
+
};
|
|
140
|
+
mayLog(log);
|
|
141
|
+
return ai_output_json;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
mayLog("####### First phase: Infer Plot Scaffolds #######");
|
|
145
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
146
|
+
const scaffoldResult = await inferScaffold(user_prompt, plotType, llm);
|
|
147
|
+
mayLog("ScaffoldResult: ", scaffoldResult);
|
|
148
|
+
mayLog("Time taken to infer scaffold:", formatElapsedTime(Date.now() - time));
|
|
149
|
+
if (!scaffoldResult)
|
|
150
|
+
throw "Scaffold result is empty or undefined, which is unexpected. Please check the inferScaffold agent for potential issues.";
|
|
151
|
+
const subplotType = scaffoldResult.plotType === "summary" ? scaffoldResult.chartType : void 0;
|
|
152
|
+
mayLog("####### Second phase: From Scaffolds's phrases infer Entities #######");
|
|
153
|
+
const genes_list = await parse_geneset_db(genedb);
|
|
154
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
155
|
+
const phrase2entityResult = await phrase2entity(scaffoldResult, plotType, llm, genes_list, dataset_json, ds);
|
|
156
|
+
mayLog("Time taken to phrase 2 entity:", formatElapsedTime(Date.now() - time));
|
|
157
|
+
if ("type" in phrase2entityResult && phrase2entityResult.type === "text") {
|
|
158
|
+
return phrase2entityResult;
|
|
159
|
+
}
|
|
160
|
+
mayLog(phrase2entityResult);
|
|
161
|
+
mayLog("####### Third phase: From Entities infer Term Objects #######");
|
|
162
|
+
const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
|
|
163
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
164
|
+
const termObj = await inferTermObjFromEntity(
|
|
165
|
+
phrase2entityResult,
|
|
166
|
+
plotType,
|
|
167
|
+
llm,
|
|
168
|
+
dataset_db,
|
|
169
|
+
genes_list
|
|
170
|
+
);
|
|
171
|
+
mayLog("Time taken to infer term objects:", formatElapsedTime(Date.now() - time));
|
|
172
|
+
mayLog("Inferred termObj from entity:", JSON.stringify(termObj));
|
|
173
|
+
mayLog("####### Fourth phase: From Term Objects to TwTvs Objects #######");
|
|
174
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
175
|
+
const twTvsObj = await resolveToTwTvs(termObj, plotType, llm, dataset_db);
|
|
176
|
+
mayLog("Time taken to resolve to TwTvs object from termObj:", formatElapsedTime(Date.now() - time));
|
|
177
|
+
if ("type" in twTvsObj && twTvsObj.type === "text") {
|
|
178
|
+
return twTvsObj;
|
|
179
|
+
}
|
|
180
|
+
mayLog("twTvsObj:", twTvsObj);
|
|
181
|
+
mayLog("####### Fifth/Final phase: From TwTvs Objects to Plot States #######");
|
|
182
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
183
|
+
ai_output_json = resolveToPlotState(twTvsObj, plotType, subplotType);
|
|
184
|
+
mayLog("Time taken to resolve to plot state:", formatElapsedTime(Date.now() - time));
|
|
185
|
+
}
|
|
186
|
+
return ai_output_json;
|
|
187
|
+
}
|
|
188
|
+
export {
|
|
189
|
+
api,
|
|
190
|
+
run_chat_pipeline
|
|
191
|
+
};
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -7,8 +7,15 @@ import serverconfig from "#src/serverconfig.js";
|
|
|
7
7
|
import { gdc_validate_query_geneExpression } from "#src/mds3.gdc.js";
|
|
8
8
|
import { mayLimitSamples } from "#src/mds3.filter.js";
|
|
9
9
|
import { clusterMethodLst, distanceMethodLst } from "#shared/clustering.js";
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
10
|
+
import { getData } from "#src/termdb.matrix.js";
|
|
11
|
+
import {
|
|
12
|
+
GENE_EXPRESSION,
|
|
13
|
+
METABOLITE_INTENSITY,
|
|
14
|
+
NUMERIC_DICTIONARY_TERM,
|
|
15
|
+
termType2label,
|
|
16
|
+
ISOFORM_EXPRESSION,
|
|
17
|
+
PROTEOME_ABUNDANCE
|
|
18
|
+
} from "#shared/terms.js";
|
|
12
19
|
import { formatElapsedTime } from "#shared/time.js";
|
|
13
20
|
const api = {
|
|
14
21
|
endpoint: "termdb/cluster",
|
|
@@ -34,8 +41,9 @@ function init({ genomes }) {
|
|
|
34
41
|
if (!ds) throw "invalid dataset name";
|
|
35
42
|
if (ds.label === "GDC" && !ds.__gdc?.doneCaching)
|
|
36
43
|
throw "The server has not finished caching the case IDs: try again in about 2 minutes.";
|
|
37
|
-
if ([
|
|
38
|
-
if (!ds.queries?.[q.dataType]
|
|
44
|
+
if ([GENE_EXPRESSION, ISOFORM_EXPRESSION, METABOLITE_INTENSITY, NUMERIC_DICTIONARY_TERM].includes(q.dataType)) {
|
|
45
|
+
if (!ds.queries?.[q.dataType] && q.dataType !== NUMERIC_DICTIONARY_TERM)
|
|
46
|
+
throw `no ${q.dataType} data on this dataset`;
|
|
39
47
|
if (!q.terms) throw `missing gene list`;
|
|
40
48
|
if (!Array.isArray(q.terms)) throw `gene list is not an array`;
|
|
41
49
|
if (q.terms.length < 3)
|
|
@@ -43,7 +51,7 @@ function init({ genomes }) {
|
|
|
43
51
|
result = await getResult(q, ds);
|
|
44
52
|
} else if (PROTEOME_ABUNDANCE == q.dataType) {
|
|
45
53
|
const proteomeQuery = ds.queries?.proteome;
|
|
46
|
-
if (!proteomeQuery?.get) throw `no ${
|
|
54
|
+
if (!proteomeQuery?.get) throw `no ${PROTEOME_ABUNDANCE} data getter on this dataset`;
|
|
47
55
|
if (!q.terms) throw `missing gene list`;
|
|
48
56
|
if (!Array.isArray(q.terms)) throw `gene list is not an array`;
|
|
49
57
|
if (q.terms.length < 3)
|
|
@@ -64,13 +72,16 @@ function init({ genomes }) {
|
|
|
64
72
|
}
|
|
65
73
|
async function getResult(q, ds) {
|
|
66
74
|
let _q = q;
|
|
67
|
-
if (q.dataType ==
|
|
75
|
+
if (q.dataType == GENE_EXPRESSION) {
|
|
68
76
|
_q = JSON.parse(JSON.stringify(q));
|
|
69
77
|
_q.forClusteringAnalysis = true;
|
|
70
78
|
_q.__abortSignal = q.__abortSignal;
|
|
71
79
|
}
|
|
72
80
|
let term2sample2value, byTermId, bySampleId, skippedSexChrGenes;
|
|
73
|
-
if (q.dataType ==
|
|
81
|
+
if (q.dataType == NUMERIC_DICTIONARY_TERM) {
|
|
82
|
+
;
|
|
83
|
+
({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds));
|
|
84
|
+
} else if (q.dataType == PROTEOME_ABUNDANCE) {
|
|
74
85
|
;
|
|
75
86
|
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.get(_q));
|
|
76
87
|
} else {
|
|
@@ -90,7 +101,7 @@ async function getResult(q, ds) {
|
|
|
90
101
|
const removedHierClusterTerms = [];
|
|
91
102
|
if (noValueTerms.length) {
|
|
92
103
|
removedHierClusterTerms.push({
|
|
93
|
-
text: `Skipped ${q.dataType ==
|
|
104
|
+
text: `Skipped ${q.dataType == GENE_EXPRESSION ? "genes" : "items"} with no data`,
|
|
94
105
|
lst: noValueTerms
|
|
95
106
|
});
|
|
96
107
|
}
|
|
@@ -100,7 +111,7 @@ async function getResult(q, ds) {
|
|
|
100
111
|
if (term2sample2value.size == 0) throw "no data";
|
|
101
112
|
if (term2sample2value.size == 1) {
|
|
102
113
|
const g = Array.from(term2sample2value.keys())[0];
|
|
103
|
-
return { term: { gene: g, type:
|
|
114
|
+
return { term: { gene: g, type: GENE_EXPRESSION }, data: term2sample2value.get(g) };
|
|
104
115
|
}
|
|
105
116
|
const t = Date.now();
|
|
106
117
|
const clustering = await doClustering(term2sample2value, q, Object.keys(bySampleId).length);
|
|
@@ -109,6 +120,29 @@ async function getResult(q, ds) {
|
|
|
109
120
|
if (removedHierClusterTerms.length) result.removedHierClusterTerms = removedHierClusterTerms;
|
|
110
121
|
return result;
|
|
111
122
|
}
|
|
123
|
+
async function getNumericDictTermAnnotation(q, ds) {
|
|
124
|
+
const getDataArgs = {
|
|
125
|
+
// TODO: figure out when term is not a termwrapper
|
|
126
|
+
terms: q.terms.map((tw) => tw.term ? tw : { term: tw, q: { mode: "continuous" } }),
|
|
127
|
+
filter: q.filter,
|
|
128
|
+
filter0: q.filter0,
|
|
129
|
+
__protected__: q.__protected__
|
|
130
|
+
};
|
|
131
|
+
const data = await getData(getDataArgs, ds);
|
|
132
|
+
if (data.error) throw data.error;
|
|
133
|
+
const term2sample2value = /* @__PURE__ */ new Map();
|
|
134
|
+
for (const [key, sampleData] of Object.entries(data.samples)) {
|
|
135
|
+
for (const [term, value] of Object.entries(sampleData)) {
|
|
136
|
+
if (term !== "sample") {
|
|
137
|
+
if (!term2sample2value.has(term)) {
|
|
138
|
+
term2sample2value.set(term, {});
|
|
139
|
+
}
|
|
140
|
+
term2sample2value.get(term)[key] = value.value;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return { term2sample2value, byTermId: data.refs.byTermId, bySampleId: data.refs.bySampleId };
|
|
145
|
+
}
|
|
112
146
|
async function doClustering(data, q, numCases = 1e3) {
|
|
113
147
|
const sampleSet = /* @__PURE__ */ new Set();
|
|
114
148
|
let firstTerm = true;
|
|
@@ -297,6 +331,7 @@ async function validateNative(q, ds) {
|
|
|
297
331
|
const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
|
|
298
332
|
if (!sampleId) continue;
|
|
299
333
|
if (limitSamples && !limitSamples.has(sampleId)) continue;
|
|
334
|
+
if (!Number.isFinite(samplesData[sampleName])) continue;
|
|
300
335
|
s2v[sampleId] = samplesData[sampleName];
|
|
301
336
|
}
|
|
302
337
|
if (Object.keys(s2v).length) {
|
package/routes/termdb.config.js
CHANGED
|
@@ -47,11 +47,12 @@ function make(q, req, res, ds, genome) {
|
|
|
47
47
|
selectCohort: getSelectCohort(ds, req),
|
|
48
48
|
supportedChartTypes: tdb.q?.getSupportedChartTypes(req),
|
|
49
49
|
renamedChartTypes: ds.cohort.renamedChartTypes,
|
|
50
|
-
allowedTermTypes:
|
|
50
|
+
allowedTermTypes: getDsAllowedTermTypes(ds),
|
|
51
51
|
massSessionDuration: serverconfig.features.massSessionDuration || 30,
|
|
52
52
|
dataDownloadCatch: tdb.dataDownloadCatch,
|
|
53
53
|
matrix: tdb.matrix,
|
|
54
54
|
hierCluster: tdb.hierCluster,
|
|
55
|
+
numericDictTermCluster: tdb.numericDictTermCluster,
|
|
55
56
|
mclass: tdb.mclass,
|
|
56
57
|
alwaysRefillCategoricalTermValues: tdb.alwaysRefillCategoricalTermValues,
|
|
57
58
|
isGeneSetTermdb: tdb.isGeneSetTermdb,
|
|
@@ -293,7 +294,7 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
293
294
|
q2.images = {};
|
|
294
295
|
}
|
|
295
296
|
}
|
|
296
|
-
function
|
|
297
|
+
function getDsAllowedTermTypes(ds) {
|
|
297
298
|
const typeSet = /* @__PURE__ */ new Set();
|
|
298
299
|
for (const r of ds.cohort.termdb.termtypeByCohort) {
|
|
299
300
|
if (r.termType) typeSet.add(r.termType);
|
|
@@ -329,5 +330,6 @@ function getSelectCohort(ds, req) {
|
|
|
329
330
|
return copy;
|
|
330
331
|
}
|
|
331
332
|
export {
|
|
332
|
-
api
|
|
333
|
+
api,
|
|
334
|
+
getDsAllowedTermTypes
|
|
333
335
|
};
|
|
@@ -3,6 +3,7 @@ import { getData } from "../src/termdb.matrix.js";
|
|
|
3
3
|
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
4
4
|
import { mayLog } from "#src/helpers.ts";
|
|
5
5
|
import { formatElapsedTime } from "#shared";
|
|
6
|
+
import { renderVolcano } from "../src/renderVolcano.ts";
|
|
6
7
|
const api = {
|
|
7
8
|
endpoint: "termdb/diffMeth",
|
|
8
9
|
methods: {
|
|
@@ -39,7 +40,7 @@ function init({ genomes }) {
|
|
|
39
40
|
throw new Error(
|
|
40
41
|
"Differential methylation analysis returned no data. Please verify sample selections and try again."
|
|
41
42
|
);
|
|
42
|
-
if (
|
|
43
|
+
if ("totalRows" in results.data && results.data.totalRows === 0)
|
|
43
44
|
throw new Error("No promoters passed filtering. Try relaxing group criteria or selecting more samples.");
|
|
44
45
|
res.send(results);
|
|
45
46
|
} catch (e) {
|
|
@@ -168,8 +169,9 @@ async function run_diffMeth(param, ds, term_results, term_results2) {
|
|
|
168
169
|
const time1 = Date.now();
|
|
169
170
|
const result = JSON.parse(await run_R("diffMeth.R", JSON.stringify(diffMethInput)));
|
|
170
171
|
mayLog("Time taken to run diffMeth:", formatElapsedTime(Date.now() - time1));
|
|
172
|
+
const rendered = await renderVolcano(result.promoter_data, param.volcanoRender);
|
|
171
173
|
const output = {
|
|
172
|
-
data:
|
|
174
|
+
data: rendered,
|
|
173
175
|
sample_size1,
|
|
174
176
|
sample_size2
|
|
175
177
|
};
|