@sjcrh/proteinpaint-server 2.186.0 → 2.188.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +3 -3
- package/package.json +6 -6
- package/routes/aiProjectAdmin.js +2 -0
- package/routes/aiProjectSelectedWSImages.js +83 -43
- package/routes/brainImaging.js +1 -1
- package/routes/deleteWSITileSelection.js +48 -15
- package/routes/genesetEnrichment.js +44 -5
- package/routes/profile.forms2.js +107 -0
- package/routes/saveWSIAnnotation.js +75 -34
- package/routes/termdb.DE.js +11 -13
- package/routes/termdb.chat3.js +77 -59
- package/routes/termdb.cluster.js +4 -1
- package/routes/termdb.config.js +6 -0
- package/routes/termdb.dapVolcano.js +80 -0
- package/routes/termdb.diffMeth.js +26 -164
- package/routes/termdb.proteome.js +15 -7
- package/routes/termdb.sampleScatter.js +4 -0
- package/routes/termdb.singlecellSamples.js +11 -0
- package/routes/termdb.topVariablyExpressedGenes.js +16 -142
- package/src/app.js +3900 -1892
package/routes/termdb.DE.js
CHANGED
|
@@ -5,7 +5,7 @@ import serverconfig from "../src/serverconfig.js";
|
|
|
5
5
|
import { get_header_txt } from "#src/utils.js";
|
|
6
6
|
import { run_rust } from "@sjcrh/proteinpaint-rust";
|
|
7
7
|
import { renderVolcano } from "../src/renderVolcano.ts";
|
|
8
|
-
import { readCacheFileOrRecompute,
|
|
8
|
+
import { readCacheFileOrRecompute, resolveDaContext, resolveSampleGroups } from "../src/diffAnalysis.ts";
|
|
9
9
|
const api = {
|
|
10
10
|
endpoint: "termdb/DE",
|
|
11
11
|
methods: {
|
|
@@ -24,7 +24,7 @@ function init({ genomes }) {
|
|
|
24
24
|
try {
|
|
25
25
|
const q = req.query;
|
|
26
26
|
if (q.preAnalysis) {
|
|
27
|
-
const { ds, term_results, term_results2 } = await
|
|
27
|
+
const { ds, term_results, term_results2 } = await resolveDaContext(q, genomes);
|
|
28
28
|
const groups = resolveSampleGroups(q, ds, term_results, term_results2);
|
|
29
29
|
const group1Name = q.samplelst.groups[0].name;
|
|
30
30
|
const group2Name = q.samplelst.groups[1].name;
|
|
@@ -37,20 +37,18 @@ function init({ genomes }) {
|
|
|
37
37
|
});
|
|
38
38
|
return;
|
|
39
39
|
}
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
const rendered = await renderVolcano(geneData, q.volcanoRender);
|
|
45
|
-
rendered.cacheId = cacheId;
|
|
40
|
+
const result = await readCacheFileOrRecompute({ daRequest: q, genomes });
|
|
41
|
+
if (result.kind !== "DE") throw new Error("expected DE result from readCacheFileOrRecompute");
|
|
42
|
+
const rendered = await renderVolcano(result.geneData, q.volcanoRender);
|
|
43
|
+
rendered.cacheId = result.cacheId;
|
|
46
44
|
const output = {
|
|
47
45
|
data: rendered,
|
|
48
|
-
sample_size1,
|
|
49
|
-
sample_size2,
|
|
50
|
-
method,
|
|
51
|
-
images
|
|
46
|
+
sample_size1: result.sample_size1,
|
|
47
|
+
sample_size2: result.sample_size2,
|
|
48
|
+
method: result.method,
|
|
49
|
+
images: result.images
|
|
52
50
|
};
|
|
53
|
-
if (bcv != null) output.bcv = bcv;
|
|
51
|
+
if (result.bcv != null) output.bcv = result.bcv;
|
|
54
52
|
res.send(output);
|
|
55
53
|
} catch (e) {
|
|
56
54
|
res.send({ status: "error", error: e.message || e });
|
package/routes/termdb.chat3.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { ChatPayload } from "#types/checkers";
|
|
2
2
|
import { mayLog } from "#src/helpers.ts";
|
|
3
3
|
import { formatElapsedTime } from "#shared";
|
|
4
|
-
import { readJSONFile, parse_geneset_db } from "./chat/utils.ts";
|
|
4
|
+
import { readJSONFile, parse_geneset_db, getChatRelatedPlotTypes } from "./chat/utils.ts";
|
|
5
5
|
import { classifyQuery } from "./chat/classify1.ts";
|
|
6
6
|
import { classifyPlotType } from "./chat/plot.ts";
|
|
7
7
|
import { classifyNotPlot } from "./chat/classify2.ts";
|
|
@@ -11,6 +11,7 @@ import { getDsAllowedTermTypes } from "./termdb.config.ts";
|
|
|
11
11
|
import { phrase2entity } from "./chat/phrase2entity.ts";
|
|
12
12
|
import { inferTermObjFromEntity } from "./chat/entity2termObj.ts";
|
|
13
13
|
import { resolveToTwTvs } from "./chat/entity2twTvs.ts";
|
|
14
|
+
import { answerDataQueries } from "./chat/dataQueries.ts";
|
|
14
15
|
import path from "path";
|
|
15
16
|
import fs from "fs";
|
|
16
17
|
import { resolveToPlotState } from "./chat/scaffold2state.ts";
|
|
@@ -31,9 +32,9 @@ function init({ genomes }) {
|
|
|
31
32
|
return async (req, res) => {
|
|
32
33
|
const q = req.query;
|
|
33
34
|
try {
|
|
34
|
-
const
|
|
35
|
-
if (!
|
|
36
|
-
const ds =
|
|
35
|
+
const genome = genomes[q.genome];
|
|
36
|
+
if (!genome) throw "invalid genome";
|
|
37
|
+
const ds = genome.datasets?.[q.dslabel];
|
|
37
38
|
if (!ds) throw "invalid dslabel";
|
|
38
39
|
const aiFilesDir = serverconfig.binpath + "/../../dataset/ai/" + q.dslabel;
|
|
39
40
|
let agentFiles = [];
|
|
@@ -49,14 +50,24 @@ function init({ genomes }) {
|
|
|
49
50
|
if (llm.provider !== "SJ" && llm.provider !== "ollama" && llm.provider !== "huggingface" && llm.provider !== "azure") {
|
|
50
51
|
throw "llm.provider must be 'SJ', 'ollama', 'huggingface', or 'azure'";
|
|
51
52
|
}
|
|
52
|
-
|
|
53
|
+
let rawFilter;
|
|
54
|
+
if (typeof q.filter === "string") {
|
|
55
|
+
try {
|
|
56
|
+
rawFilter = JSON.parse(q.filter);
|
|
57
|
+
} catch (e) {
|
|
58
|
+
throw new Error("Failed to parse filter JSON string: " + e);
|
|
59
|
+
}
|
|
60
|
+
} else {
|
|
61
|
+
rawFilter = q.filter;
|
|
62
|
+
}
|
|
53
63
|
const filter = rawFilter && typeof rawFilter === "object" ? rawFilter : {};
|
|
54
64
|
const lst = Array.isArray(filter.lst) ? filter.lst : [];
|
|
55
65
|
const cohortFilter = lst.find((item) => item.tag === "cohortFilter");
|
|
56
66
|
const cohortKey = cohortFilter ? cohortFilter.tvs.values[0].key : "";
|
|
57
|
-
const
|
|
58
|
-
const
|
|
59
|
-
const
|
|
67
|
+
const supportedPlotTypes = ds.cohort.termdb.q?.getSupportedChartTypes(req)?.[cohortKey];
|
|
68
|
+
const chatSupportedPlotTypes = getChatRelatedPlotTypes(supportedPlotTypes);
|
|
69
|
+
const genedb = serverconfig.tpmasterdir + "/" + genome.genedb.dbfile;
|
|
70
|
+
const allowedTermTypes = getDsAllowedTermTypes(ds);
|
|
60
71
|
const ai_output_json = await run_chat_pipeline(
|
|
61
72
|
q.prompt,
|
|
62
73
|
llm,
|
|
@@ -64,8 +75,9 @@ function init({ genomes }) {
|
|
|
64
75
|
genedb,
|
|
65
76
|
agentFiles,
|
|
66
77
|
aiFilesDir,
|
|
67
|
-
|
|
68
|
-
|
|
78
|
+
chatSupportedPlotTypes,
|
|
79
|
+
allowedTermTypes,
|
|
80
|
+
genome
|
|
69
81
|
// testing
|
|
70
82
|
);
|
|
71
83
|
mayLog("From init: Final AI output JSON:", JSON.stringify(ai_output_json));
|
|
@@ -76,19 +88,19 @@ function init({ genomes }) {
|
|
|
76
88
|
}
|
|
77
89
|
};
|
|
78
90
|
}
|
|
79
|
-
async function run_chat_pipeline(
|
|
91
|
+
async function run_chat_pipeline(userPrompt, llm, ds, genedb, agentFiles, aiFilesDir, supportedPlotTypes, allowedTermTypes, genome) {
|
|
80
92
|
if (!fs.existsSync(path.join(aiFilesDir, "main.json")))
|
|
81
93
|
throw "Main data file is not specified for dataset:" + ds.label;
|
|
82
94
|
const dataset_json = await readJSONFile(path.join(aiFilesDir, "main.json"));
|
|
83
95
|
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
84
|
-
const class_response = await classifyQuery(
|
|
96
|
+
const class_response = await classifyQuery(userPrompt, llm);
|
|
85
97
|
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
86
98
|
let ai_output_json;
|
|
87
|
-
if (class_response.type
|
|
99
|
+
if (class_response.type === "notplot") {
|
|
88
100
|
const time2 = (/* @__PURE__ */ new Date()).valueOf();
|
|
89
|
-
const notPlotResult = await classifyNotPlot(
|
|
101
|
+
const notPlotResult = await classifyNotPlot(userPrompt, llm, agentFiles, aiFilesDir);
|
|
90
102
|
mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
|
|
91
|
-
if (notPlotResult.type
|
|
103
|
+
if (notPlotResult.type === "html") {
|
|
92
104
|
ai_output_json = notPlotResult;
|
|
93
105
|
} else {
|
|
94
106
|
ai_output_json = {
|
|
@@ -96,70 +108,72 @@ async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFil
|
|
|
96
108
|
text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
|
|
97
109
|
};
|
|
98
110
|
}
|
|
99
|
-
} else if (class_response.type
|
|
111
|
+
} else if (class_response.type === "binaryQuery") {
|
|
112
|
+
const answer = await answerDataQueries(userPrompt, llm, allowedTermTypes);
|
|
113
|
+
if (!answer) throw "Couldn't decide if this is data related query!";
|
|
114
|
+
mayLog("Data Binary Query: ", answer);
|
|
115
|
+
ai_output_json = answer;
|
|
116
|
+
} else if (class_response.type === "plot") {
|
|
100
117
|
let time = (/* @__PURE__ */ new Date()).valueOf();
|
|
101
|
-
const plotType = await classifyPlotType(
|
|
118
|
+
const plotType = await classifyPlotType(userPrompt, llm);
|
|
102
119
|
mayLog("Time taken to classify plot type:", formatElapsedTime(Date.now() - time));
|
|
103
|
-
if (!
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
const errorResponse = {
|
|
120
|
+
if (!supportedPlotTypes.includes(plotType)) {
|
|
121
|
+
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
122
|
+
ai_output_json = {
|
|
107
123
|
type: "text",
|
|
108
|
-
text:
|
|
124
|
+
text: log
|
|
109
125
|
};
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
if (plotType === "summary") {
|
|
113
|
-
if (!supportedChartTypes.includes("dictionary")) {
|
|
114
|
-
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
115
|
-
ai_output_json = {
|
|
116
|
-
type: "text",
|
|
117
|
-
text: log
|
|
118
|
-
};
|
|
119
|
-
mayLog(log);
|
|
120
|
-
return ai_output_json;
|
|
121
|
-
}
|
|
122
|
-
} else if (plotType === "dge") {
|
|
123
|
-
if (!supportedChartTypes.includes("DA")) {
|
|
124
|
-
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
125
|
-
ai_output_json = {
|
|
126
|
-
type: "text",
|
|
127
|
-
text: log
|
|
128
|
-
};
|
|
129
|
-
mayLog(log);
|
|
130
|
-
return ai_output_json;
|
|
131
|
-
}
|
|
132
|
-
} else {
|
|
133
|
-
mayLog(`Supported chart types for this cohort: ${supportedChartTypes}`);
|
|
134
|
-
if (!supportedChartTypes.includes(plotType)) {
|
|
135
|
-
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
136
|
-
ai_output_json = {
|
|
137
|
-
type: "text",
|
|
138
|
-
text: log
|
|
139
|
-
};
|
|
140
|
-
mayLog(log);
|
|
141
|
-
return ai_output_json;
|
|
142
|
-
}
|
|
126
|
+
mayLog(log);
|
|
127
|
+
return ai_output_json;
|
|
143
128
|
}
|
|
129
|
+
const genes_list = await parse_geneset_db(genedb);
|
|
130
|
+
mayLog("#################################################");
|
|
144
131
|
mayLog("####### First phase: Infer Plot Scaffolds #######");
|
|
132
|
+
mayLog("#################################################");
|
|
145
133
|
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
146
|
-
const
|
|
134
|
+
const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
|
|
135
|
+
const scaffoldResult = await inferScaffold(
|
|
136
|
+
userPrompt,
|
|
137
|
+
plotType,
|
|
138
|
+
llm,
|
|
139
|
+
genome,
|
|
140
|
+
genes_list,
|
|
141
|
+
allowedTermTypes,
|
|
142
|
+
dataset_json,
|
|
143
|
+
ds,
|
|
144
|
+
dataset_db
|
|
145
|
+
);
|
|
147
146
|
mayLog("ScaffoldResult: ", scaffoldResult);
|
|
147
|
+
if (plotType === "hiercluster" && "plot" in scaffoldResult && scaffoldResult.type === "plot" || "text" in scaffoldResult && scaffoldResult.type === "text") {
|
|
148
|
+
return scaffoldResult;
|
|
149
|
+
}
|
|
148
150
|
mayLog("Time taken to infer scaffold:", formatElapsedTime(Date.now() - time));
|
|
149
151
|
if (!scaffoldResult)
|
|
150
152
|
throw "Scaffold result is empty or undefined, which is unexpected. Please check the inferScaffold agent for potential issues.";
|
|
153
|
+
if ("type" in scaffoldResult && scaffoldResult.type === "text") {
|
|
154
|
+
return scaffoldResult;
|
|
155
|
+
}
|
|
151
156
|
const subplotType = scaffoldResult.plotType === "summary" ? scaffoldResult.chartType : void 0;
|
|
157
|
+
mayLog("#################################################");
|
|
152
158
|
mayLog("####### Second phase: From Scaffolds's phrases infer Entities #######");
|
|
153
|
-
|
|
159
|
+
mayLog("#################################################");
|
|
154
160
|
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
155
|
-
const phrase2entityResult = await phrase2entity(
|
|
161
|
+
const phrase2entityResult = await phrase2entity(
|
|
162
|
+
scaffoldResult,
|
|
163
|
+
plotType,
|
|
164
|
+
llm,
|
|
165
|
+
genes_list,
|
|
166
|
+
dataset_json,
|
|
167
|
+
ds
|
|
168
|
+
);
|
|
156
169
|
mayLog("Time taken to phrase 2 entity:", formatElapsedTime(Date.now() - time));
|
|
157
170
|
if ("type" in phrase2entityResult && phrase2entityResult.type === "text") {
|
|
158
171
|
return phrase2entityResult;
|
|
159
172
|
}
|
|
160
173
|
mayLog(phrase2entityResult);
|
|
174
|
+
mayLog("#################################################");
|
|
161
175
|
mayLog("####### Third phase: From Entities infer Term Objects #######");
|
|
162
|
-
|
|
176
|
+
mayLog("#################################################");
|
|
163
177
|
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
164
178
|
const termObj = await inferTermObjFromEntity(
|
|
165
179
|
phrase2entityResult,
|
|
@@ -170,7 +184,9 @@ async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFil
|
|
|
170
184
|
);
|
|
171
185
|
mayLog("Time taken to infer term objects:", formatElapsedTime(Date.now() - time));
|
|
172
186
|
mayLog("Inferred termObj from entity:", JSON.stringify(termObj));
|
|
187
|
+
mayLog("#################################################");
|
|
173
188
|
mayLog("####### Fourth phase: From Term Objects to TwTvs Objects #######");
|
|
189
|
+
mayLog("#################################################");
|
|
174
190
|
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
175
191
|
const twTvsObj = await resolveToTwTvs(termObj, plotType, llm, dataset_db);
|
|
176
192
|
mayLog("Time taken to resolve to TwTvs object from termObj:", formatElapsedTime(Date.now() - time));
|
|
@@ -178,7 +194,9 @@ async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFil
|
|
|
178
194
|
return twTvsObj;
|
|
179
195
|
}
|
|
180
196
|
mayLog("twTvsObj:", twTvsObj);
|
|
197
|
+
mayLog("#################################################");
|
|
181
198
|
mayLog("####### Fifth/Final phase: From TwTvs Objects to Plot States #######");
|
|
199
|
+
mayLog("#################################################");
|
|
182
200
|
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
183
201
|
ai_output_json = resolveToPlotState(twTvsObj, plotType, subplotType);
|
|
184
202
|
mayLog("Time taken to resolve to plot state:", formatElapsedTime(Date.now() - time));
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -83,7 +83,10 @@ async function getResult(q, ds) {
|
|
|
83
83
|
({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds));
|
|
84
84
|
} else if (q.dataType == PROTEOME_ABUNDANCE) {
|
|
85
85
|
;
|
|
86
|
-
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.get(
|
|
86
|
+
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.get({
|
|
87
|
+
..._q,
|
|
88
|
+
dataTypeDetails: _q.proteomeDetails
|
|
89
|
+
}));
|
|
87
90
|
} else {
|
|
88
91
|
;
|
|
89
92
|
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries[q.dataType].get(_q, ds));
|
package/routes/termdb.config.js
CHANGED
|
@@ -203,6 +203,9 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
203
203
|
if (orgSrc.overlayTerm) {
|
|
204
204
|
q2.proteome.organisms[organism].overlayTerm = JSON.parse(JSON.stringify(orgSrc.overlayTerm));
|
|
205
205
|
}
|
|
206
|
+
if (orgSrc.genomeName) {
|
|
207
|
+
q2.proteome.organisms[organism].genomeName = orgSrc.genomeName;
|
|
208
|
+
}
|
|
206
209
|
if (orgSrc.assays) {
|
|
207
210
|
q2.proteome.organisms[organism].assays = {};
|
|
208
211
|
for (const assay in orgSrc.assays) {
|
|
@@ -222,6 +225,9 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
222
225
|
JSON.stringify(src.caseFilter)
|
|
223
226
|
);
|
|
224
227
|
}
|
|
228
|
+
if (src.DAPfile) {
|
|
229
|
+
q2.proteome.organisms[organism].assays[assay].cohorts[cohort].DAPfile = true;
|
|
230
|
+
}
|
|
225
231
|
}
|
|
226
232
|
}
|
|
227
233
|
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import fs from "fs/promises";
|
|
3
|
+
import { dapVolcanoPayload } from "#types/checkers";
|
|
4
|
+
import { get_ds_tdb } from "#src/termdb.js";
|
|
5
|
+
import { renderVolcano } from "../src/renderVolcano.ts";
|
|
6
|
+
import serverconfig from "../src/serverconfig.js";
|
|
7
|
+
import { countDistinctSamples } from "./termdb.proteome.ts";
|
|
8
|
+
const api = {
|
|
9
|
+
endpoint: "termdb/dapVolcano",
|
|
10
|
+
methods: {
|
|
11
|
+
get: {
|
|
12
|
+
...dapVolcanoPayload,
|
|
13
|
+
init
|
|
14
|
+
},
|
|
15
|
+
post: {
|
|
16
|
+
...dapVolcanoPayload,
|
|
17
|
+
init
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
function init({ genomes }) {
|
|
22
|
+
return async (req, res) => {
|
|
23
|
+
try {
|
|
24
|
+
const q = req.query;
|
|
25
|
+
const genome = genomes[q.genome];
|
|
26
|
+
if (!genome) throw "invalid genome";
|
|
27
|
+
const [ds] = get_ds_tdb(genome, q);
|
|
28
|
+
const proteomeConfig = ds.queries?.proteome;
|
|
29
|
+
if (!proteomeConfig) throw "proteome not configured for this dataset";
|
|
30
|
+
const organismConfig = proteomeConfig.organisms?.[q.organism];
|
|
31
|
+
if (!organismConfig) throw "invalid organism";
|
|
32
|
+
const assayConfig = organismConfig.assays?.[q.assay];
|
|
33
|
+
if (!assayConfig) throw "invalid assay";
|
|
34
|
+
const cohortConfig = assayConfig.cohorts?.[q.cohort];
|
|
35
|
+
if (!cohortConfig) throw "invalid cohort";
|
|
36
|
+
if (!cohortConfig.DAPfile) throw "DAP file not configured for this cohort";
|
|
37
|
+
const organismFilter = [{ columnIdx: organismConfig.columnIdx, columnValue: organismConfig.columnValue }];
|
|
38
|
+
const assayFilter = [{ columnIdx: assayConfig.columnIdx, columnValue: assayConfig.columnValue }];
|
|
39
|
+
const db = proteomeConfig.db;
|
|
40
|
+
const controlCount = countDistinctSamples(db, [...organismFilter, ...assayFilter, ...cohortConfig.controlFilter]);
|
|
41
|
+
const caseCount = countDistinctSamples(db, [...organismFilter, ...assayFilter, ...cohortConfig.caseFilter]);
|
|
42
|
+
if (q.countsOnly) {
|
|
43
|
+
res.send({ sample_size1: controlCount, sample_size2: caseCount });
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
const filePath = path.join(serverconfig.tpmasterdir, cohortConfig.DAPfile);
|
|
47
|
+
const content = await fs.readFile(filePath, "utf8");
|
|
48
|
+
const lines = content.trim().split("\n");
|
|
49
|
+
const rustRows = [];
|
|
50
|
+
for (let i = 1; i < lines.length; i++) {
|
|
51
|
+
const parts = lines[i].split(" ");
|
|
52
|
+
if (parts.length < 4) continue;
|
|
53
|
+
const fc = Number(parts[2]);
|
|
54
|
+
if (!Number.isFinite(fc)) continue;
|
|
55
|
+
const pValue = Number(parts[3]);
|
|
56
|
+
if (!Number.isFinite(pValue)) continue;
|
|
57
|
+
rustRows.push({
|
|
58
|
+
gene_name: parts[0],
|
|
59
|
+
gene: parts[1],
|
|
60
|
+
fold_change: fc,
|
|
61
|
+
original_p_value: pValue,
|
|
62
|
+
adjusted_p_value: pValue
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
const rendered = await renderVolcano(rustRows, q.volcanoRender);
|
|
66
|
+
for (const d of rendered.dots) delete d.adjusted_p_value;
|
|
67
|
+
res.send({
|
|
68
|
+
data: rendered,
|
|
69
|
+
sample_size1: controlCount,
|
|
70
|
+
sample_size2: caseCount
|
|
71
|
+
});
|
|
72
|
+
} catch (e) {
|
|
73
|
+
res.send({ status: "error", error: e.message || e });
|
|
74
|
+
if (e instanceof Error && e.stack) console.log(e);
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
export {
|
|
79
|
+
api
|
|
80
|
+
};
|
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
import { diffMethPayload } from "#types/checkers";
|
|
2
|
-
import { getData } from "../src/termdb.matrix.js";
|
|
3
|
-
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
4
|
-
import { mayLog } from "#src/helpers.ts";
|
|
5
|
-
import { formatElapsedTime } from "#shared";
|
|
6
2
|
import { renderVolcano } from "../src/renderVolcano.ts";
|
|
3
|
+
import { readCacheFileOrRecompute, resolveDaContext, resolveDmSampleGroups } from "../src/diffAnalysis.ts";
|
|
7
4
|
const api = {
|
|
8
5
|
endpoint: "termdb/diffMeth",
|
|
9
6
|
methods: {
|
|
@@ -21,173 +18,38 @@ function init({ genomes }) {
|
|
|
21
18
|
return async (req, res) => {
|
|
22
19
|
try {
|
|
23
20
|
const q = req.query;
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
const
|
|
39
|
-
if (
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if ("totalRows" in results.data && results.data.totalRows === 0)
|
|
21
|
+
if (q.preAnalysis) {
|
|
22
|
+
const { ds, term_results, term_results2 } = await resolveDaContext(q, genomes);
|
|
23
|
+
const groups = resolveDmSampleGroups(q, ds, term_results, term_results2);
|
|
24
|
+
const group1Name = q.samplelst.groups[0].name;
|
|
25
|
+
const group2Name = q.samplelst.groups[1].name;
|
|
26
|
+
res.send({
|
|
27
|
+
data: {
|
|
28
|
+
[group1Name]: groups.group1names.length,
|
|
29
|
+
[group2Name]: groups.group2names.length,
|
|
30
|
+
...groups.alerts.length ? { alert: groups.alerts.join(" | ") } : {}
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
const result = await readCacheFileOrRecompute({ daRequest: q, genomes });
|
|
36
|
+
if (result.kind !== "DM") throw new Error("expected DM result from readCacheFileOrRecompute");
|
|
37
|
+
const rendered = await renderVolcano(result.promoterData, q.volcanoRender);
|
|
38
|
+
rendered.cacheId = result.cacheId;
|
|
39
|
+
if (rendered.totalRows === 0)
|
|
44
40
|
throw new Error("No promoters passed filtering. Try relaxing group criteria or selecting more samples.");
|
|
45
|
-
|
|
41
|
+
const output = {
|
|
42
|
+
data: rendered,
|
|
43
|
+
sample_size1: result.sample_size1,
|
|
44
|
+
sample_size2: result.sample_size2
|
|
45
|
+
};
|
|
46
|
+
res.send(output);
|
|
46
47
|
} catch (e) {
|
|
47
48
|
res.send({ status: "error", error: e.message || e });
|
|
48
49
|
if (e instanceof Error && e.stack) console.log(e);
|
|
49
50
|
}
|
|
50
51
|
};
|
|
51
52
|
}
|
|
52
|
-
async function run_diffMeth(param, ds, term_results, term_results2) {
|
|
53
|
-
if (param.samplelst?.groups?.length != 2)
|
|
54
|
-
throw new Error("Exactly 2 sample groups are required for differential methylation analysis.");
|
|
55
|
-
if (param.samplelst.groups[0].values?.length < 1)
|
|
56
|
-
throw new Error("Group 1 has no samples. Please select at least one sample.");
|
|
57
|
-
if (param.samplelst.groups[1].values?.length < 1)
|
|
58
|
-
throw new Error("Group 2 has no samples. Please select at least one sample.");
|
|
59
|
-
const q = ds.queries.dnaMethylation?.promoter;
|
|
60
|
-
if (!q) throw new Error("This dataset does not have promoter-level methylation data configured.");
|
|
61
|
-
if (!q.file) throw new Error("Promoter methylation data file is not configured for this dataset.");
|
|
62
|
-
const group1names = [];
|
|
63
|
-
const conf1_group1 = [];
|
|
64
|
-
const conf2_group1 = [];
|
|
65
|
-
for (const s of param.samplelst.groups[0].values) {
|
|
66
|
-
if (!Number.isInteger(s.sampleId)) continue;
|
|
67
|
-
const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
|
|
68
|
-
if (!n) continue;
|
|
69
|
-
if (!q.allSampleSet.has(n)) continue;
|
|
70
|
-
if (param.tw && param.tw2) {
|
|
71
|
-
if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
|
|
72
|
-
conf1_group1.push(
|
|
73
|
-
param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
|
|
74
|
-
);
|
|
75
|
-
conf2_group1.push(
|
|
76
|
-
param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
|
|
77
|
-
);
|
|
78
|
-
group1names.push(n);
|
|
79
|
-
}
|
|
80
|
-
} else if (param.tw && !param.tw2) {
|
|
81
|
-
if (term_results.samples[s.sampleId]) {
|
|
82
|
-
conf1_group1.push(
|
|
83
|
-
param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
|
|
84
|
-
);
|
|
85
|
-
group1names.push(n);
|
|
86
|
-
}
|
|
87
|
-
} else if (!param.tw && param.tw2) {
|
|
88
|
-
if (term_results2.samples[s.sampleId]) {
|
|
89
|
-
conf2_group1.push(
|
|
90
|
-
param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
|
|
91
|
-
);
|
|
92
|
-
group1names.push(n);
|
|
93
|
-
}
|
|
94
|
-
} else {
|
|
95
|
-
group1names.push(n);
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
const group2names = [];
|
|
99
|
-
const conf1_group2 = [];
|
|
100
|
-
const conf2_group2 = [];
|
|
101
|
-
for (const s of param.samplelst.groups[1].values) {
|
|
102
|
-
if (!Number.isInteger(s.sampleId)) continue;
|
|
103
|
-
const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
|
|
104
|
-
if (!n) continue;
|
|
105
|
-
if (!q.allSampleSet.has(n)) continue;
|
|
106
|
-
if (param.tw && param.tw2) {
|
|
107
|
-
if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
|
|
108
|
-
conf1_group2.push(
|
|
109
|
-
param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
|
|
110
|
-
);
|
|
111
|
-
conf2_group2.push(
|
|
112
|
-
param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
|
|
113
|
-
);
|
|
114
|
-
group2names.push(n);
|
|
115
|
-
}
|
|
116
|
-
} else if (param.tw && !param.tw2) {
|
|
117
|
-
if (term_results.samples[s.sampleId]) {
|
|
118
|
-
conf1_group2.push(
|
|
119
|
-
param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
|
|
120
|
-
);
|
|
121
|
-
group2names.push(n);
|
|
122
|
-
}
|
|
123
|
-
} else if (!param.tw && param.tw2) {
|
|
124
|
-
if (term_results2.samples[s.sampleId]) {
|
|
125
|
-
conf2_group2.push(
|
|
126
|
-
param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
|
|
127
|
-
);
|
|
128
|
-
group2names.push(n);
|
|
129
|
-
}
|
|
130
|
-
} else {
|
|
131
|
-
group2names.push(n);
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
const sample_size1 = group1names.length;
|
|
135
|
-
const sample_size2 = group2names.length;
|
|
136
|
-
const alerts = validateGroups(sample_size1, sample_size2, group1names, group2names);
|
|
137
|
-
if (param.preAnalysis) {
|
|
138
|
-
const group1Name = param.samplelst.groups[0].name;
|
|
139
|
-
const group2Name = param.samplelst.groups[1].name;
|
|
140
|
-
return {
|
|
141
|
-
data: {
|
|
142
|
-
[group1Name]: sample_size1,
|
|
143
|
-
[group2Name]: sample_size2,
|
|
144
|
-
...alerts.length ? { alert: alerts.join(" | ") } : {}
|
|
145
|
-
}
|
|
146
|
-
};
|
|
147
|
-
}
|
|
148
|
-
if (alerts.length) throw new Error(alerts.join(" | "));
|
|
149
|
-
const diffMethInput = {
|
|
150
|
-
case: group2names.join(","),
|
|
151
|
-
control: group1names.join(","),
|
|
152
|
-
input_file: q.file,
|
|
153
|
-
min_samples_per_group: param.min_samples_per_group
|
|
154
|
-
};
|
|
155
|
-
if (param.tw) {
|
|
156
|
-
diffMethInput.conf1 = [...conf1_group2, ...conf1_group1];
|
|
157
|
-
diffMethInput.conf1_mode = param.tw.q.mode;
|
|
158
|
-
if (new Set(diffMethInput.conf1).size === 1) {
|
|
159
|
-
throw new Error("Confounding variable 1 has only one value");
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
if (param.tw2) {
|
|
163
|
-
diffMethInput.conf2 = [...conf2_group2, ...conf2_group1];
|
|
164
|
-
diffMethInput.conf2_mode = param.tw2.q.mode;
|
|
165
|
-
if (new Set(diffMethInput.conf2).size === 1) {
|
|
166
|
-
throw new Error("Confounding variable 2 has only one value");
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
const time1 = Date.now();
|
|
170
|
-
const result = JSON.parse(await run_R("diffMeth.R", JSON.stringify(diffMethInput)));
|
|
171
|
-
mayLog("Time taken to run diffMeth:", formatElapsedTime(Date.now() - time1));
|
|
172
|
-
const rendered = await renderVolcano(result.promoter_data, param.volcanoRender);
|
|
173
|
-
const output = {
|
|
174
|
-
data: rendered,
|
|
175
|
-
sample_size1,
|
|
176
|
-
sample_size2
|
|
177
|
-
};
|
|
178
|
-
return output;
|
|
179
|
-
}
|
|
180
|
-
function validateGroups(sample_size1, sample_size2, group1names, group2names) {
|
|
181
|
-
const alerts = [];
|
|
182
|
-
if (sample_size1 < 1) alerts.push("No samples in group 1 have methylation data available.");
|
|
183
|
-
if (sample_size2 < 1) alerts.push("No samples in group 2 have methylation data available.");
|
|
184
|
-
const commonnames = group1names.filter((x) => group2names.includes(x));
|
|
185
|
-
if (commonnames.length)
|
|
186
|
-
alerts.push(
|
|
187
|
-
`${commonnames.length} sample(s) appear in both groups: ${commonnames.join(", ")}. Please remove duplicates.`
|
|
188
|
-
);
|
|
189
|
-
return alerts;
|
|
190
|
-
}
|
|
191
53
|
export {
|
|
192
54
|
api
|
|
193
55
|
};
|