@sjcrh/proteinpaint-server 2.187.0 → 2.188.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +3 -3
- package/package.json +6 -6
- package/routes/aiProjectAdmin.js +2 -0
- package/routes/aiProjectSelectedWSImages.js +83 -43
- package/routes/brainImaging.js +1 -1
- package/routes/deleteWSITileSelection.js +48 -15
- package/routes/genesetEnrichment.js +23 -0
- package/routes/saveWSIAnnotation.js +75 -34
- package/routes/termdb.chat3.js +77 -59
- package/routes/termdb.cluster.js +4 -1
- package/routes/termdb.config.js +3 -0
- package/routes/termdb.dapVolcano.js +80 -0
- package/routes/termdb.proteome.js +14 -7
- package/routes/termdb.sampleScatter.js +4 -0
- package/routes/termdb.singlecellSamples.js +11 -0
- package/routes/termdb.topVariablyExpressedGenes.js +16 -142
- package/src/app.js +3172 -1376
package/routes/termdb.chat3.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { ChatPayload } from "#types/checkers";
|
|
2
2
|
import { mayLog } from "#src/helpers.ts";
|
|
3
3
|
import { formatElapsedTime } from "#shared";
|
|
4
|
-
import { readJSONFile, parse_geneset_db } from "./chat/utils.ts";
|
|
4
|
+
import { readJSONFile, parse_geneset_db, getChatRelatedPlotTypes } from "./chat/utils.ts";
|
|
5
5
|
import { classifyQuery } from "./chat/classify1.ts";
|
|
6
6
|
import { classifyPlotType } from "./chat/plot.ts";
|
|
7
7
|
import { classifyNotPlot } from "./chat/classify2.ts";
|
|
@@ -11,6 +11,7 @@ import { getDsAllowedTermTypes } from "./termdb.config.ts";
|
|
|
11
11
|
import { phrase2entity } from "./chat/phrase2entity.ts";
|
|
12
12
|
import { inferTermObjFromEntity } from "./chat/entity2termObj.ts";
|
|
13
13
|
import { resolveToTwTvs } from "./chat/entity2twTvs.ts";
|
|
14
|
+
import { answerDataQueries } from "./chat/dataQueries.ts";
|
|
14
15
|
import path from "path";
|
|
15
16
|
import fs from "fs";
|
|
16
17
|
import { resolveToPlotState } from "./chat/scaffold2state.ts";
|
|
@@ -31,9 +32,9 @@ function init({ genomes }) {
|
|
|
31
32
|
return async (req, res) => {
|
|
32
33
|
const q = req.query;
|
|
33
34
|
try {
|
|
34
|
-
const
|
|
35
|
-
if (!
|
|
36
|
-
const ds =
|
|
35
|
+
const genome = genomes[q.genome];
|
|
36
|
+
if (!genome) throw "invalid genome";
|
|
37
|
+
const ds = genome.datasets?.[q.dslabel];
|
|
37
38
|
if (!ds) throw "invalid dslabel";
|
|
38
39
|
const aiFilesDir = serverconfig.binpath + "/../../dataset/ai/" + q.dslabel;
|
|
39
40
|
let agentFiles = [];
|
|
@@ -49,14 +50,24 @@ function init({ genomes }) {
|
|
|
49
50
|
if (llm.provider !== "SJ" && llm.provider !== "ollama" && llm.provider !== "huggingface" && llm.provider !== "azure") {
|
|
50
51
|
throw "llm.provider must be 'SJ', 'ollama', 'huggingface', or 'azure'";
|
|
51
52
|
}
|
|
52
|
-
|
|
53
|
+
let rawFilter;
|
|
54
|
+
if (typeof q.filter === "string") {
|
|
55
|
+
try {
|
|
56
|
+
rawFilter = JSON.parse(q.filter);
|
|
57
|
+
} catch (e) {
|
|
58
|
+
throw new Error("Failed to parse filter JSON string: " + e);
|
|
59
|
+
}
|
|
60
|
+
} else {
|
|
61
|
+
rawFilter = q.filter;
|
|
62
|
+
}
|
|
53
63
|
const filter = rawFilter && typeof rawFilter === "object" ? rawFilter : {};
|
|
54
64
|
const lst = Array.isArray(filter.lst) ? filter.lst : [];
|
|
55
65
|
const cohortFilter = lst.find((item) => item.tag === "cohortFilter");
|
|
56
66
|
const cohortKey = cohortFilter ? cohortFilter.tvs.values[0].key : "";
|
|
57
|
-
const
|
|
58
|
-
const
|
|
59
|
-
const
|
|
67
|
+
const supportedPlotTypes = ds.cohort.termdb.q?.getSupportedChartTypes(req)?.[cohortKey];
|
|
68
|
+
const chatSupportedPlotTypes = getChatRelatedPlotTypes(supportedPlotTypes);
|
|
69
|
+
const genedb = serverconfig.tpmasterdir + "/" + genome.genedb.dbfile;
|
|
70
|
+
const allowedTermTypes = getDsAllowedTermTypes(ds);
|
|
60
71
|
const ai_output_json = await run_chat_pipeline(
|
|
61
72
|
q.prompt,
|
|
62
73
|
llm,
|
|
@@ -64,8 +75,9 @@ function init({ genomes }) {
|
|
|
64
75
|
genedb,
|
|
65
76
|
agentFiles,
|
|
66
77
|
aiFilesDir,
|
|
67
|
-
|
|
68
|
-
|
|
78
|
+
chatSupportedPlotTypes,
|
|
79
|
+
allowedTermTypes,
|
|
80
|
+
genome
|
|
69
81
|
// testing
|
|
70
82
|
);
|
|
71
83
|
mayLog("From init: Final AI output JSON:", JSON.stringify(ai_output_json));
|
|
@@ -76,19 +88,19 @@ function init({ genomes }) {
|
|
|
76
88
|
}
|
|
77
89
|
};
|
|
78
90
|
}
|
|
79
|
-
async function run_chat_pipeline(
|
|
91
|
+
async function run_chat_pipeline(userPrompt, llm, ds, genedb, agentFiles, aiFilesDir, supportedPlotTypes, allowedTermTypes, genome) {
|
|
80
92
|
if (!fs.existsSync(path.join(aiFilesDir, "main.json")))
|
|
81
93
|
throw "Main data file is not specified for dataset:" + ds.label;
|
|
82
94
|
const dataset_json = await readJSONFile(path.join(aiFilesDir, "main.json"));
|
|
83
95
|
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
84
|
-
const class_response = await classifyQuery(
|
|
96
|
+
const class_response = await classifyQuery(userPrompt, llm);
|
|
85
97
|
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
86
98
|
let ai_output_json;
|
|
87
|
-
if (class_response.type
|
|
99
|
+
if (class_response.type === "notplot") {
|
|
88
100
|
const time2 = (/* @__PURE__ */ new Date()).valueOf();
|
|
89
|
-
const notPlotResult = await classifyNotPlot(
|
|
101
|
+
const notPlotResult = await classifyNotPlot(userPrompt, llm, agentFiles, aiFilesDir);
|
|
90
102
|
mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
|
|
91
|
-
if (notPlotResult.type
|
|
103
|
+
if (notPlotResult.type === "html") {
|
|
92
104
|
ai_output_json = notPlotResult;
|
|
93
105
|
} else {
|
|
94
106
|
ai_output_json = {
|
|
@@ -96,70 +108,72 @@ async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFil
|
|
|
96
108
|
text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
|
|
97
109
|
};
|
|
98
110
|
}
|
|
99
|
-
} else if (class_response.type
|
|
111
|
+
} else if (class_response.type === "binaryQuery") {
|
|
112
|
+
const answer = await answerDataQueries(userPrompt, llm, allowedTermTypes);
|
|
113
|
+
if (!answer) throw "Couldn't decide if this is data related query!";
|
|
114
|
+
mayLog("Data Binary Query: ", answer);
|
|
115
|
+
ai_output_json = answer;
|
|
116
|
+
} else if (class_response.type === "plot") {
|
|
100
117
|
let time = (/* @__PURE__ */ new Date()).valueOf();
|
|
101
|
-
const plotType = await classifyPlotType(
|
|
118
|
+
const plotType = await classifyPlotType(userPrompt, llm);
|
|
102
119
|
mayLog("Time taken to classify plot type:", formatElapsedTime(Date.now() - time));
|
|
103
|
-
if (!
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
const errorResponse = {
|
|
120
|
+
if (!supportedPlotTypes.includes(plotType)) {
|
|
121
|
+
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
122
|
+
ai_output_json = {
|
|
107
123
|
type: "text",
|
|
108
|
-
text:
|
|
124
|
+
text: log
|
|
109
125
|
};
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
if (plotType === "summary") {
|
|
113
|
-
if (!supportedChartTypes.includes("dictionary")) {
|
|
114
|
-
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
115
|
-
ai_output_json = {
|
|
116
|
-
type: "text",
|
|
117
|
-
text: log
|
|
118
|
-
};
|
|
119
|
-
mayLog(log);
|
|
120
|
-
return ai_output_json;
|
|
121
|
-
}
|
|
122
|
-
} else if (plotType === "dge") {
|
|
123
|
-
if (!supportedChartTypes.includes("DA")) {
|
|
124
|
-
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
125
|
-
ai_output_json = {
|
|
126
|
-
type: "text",
|
|
127
|
-
text: log
|
|
128
|
-
};
|
|
129
|
-
mayLog(log);
|
|
130
|
-
return ai_output_json;
|
|
131
|
-
}
|
|
132
|
-
} else {
|
|
133
|
-
mayLog(`Supported chart types for this cohort: ${supportedChartTypes}`);
|
|
134
|
-
if (!supportedChartTypes.includes(plotType)) {
|
|
135
|
-
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
136
|
-
ai_output_json = {
|
|
137
|
-
type: "text",
|
|
138
|
-
text: log
|
|
139
|
-
};
|
|
140
|
-
mayLog(log);
|
|
141
|
-
return ai_output_json;
|
|
142
|
-
}
|
|
126
|
+
mayLog(log);
|
|
127
|
+
return ai_output_json;
|
|
143
128
|
}
|
|
129
|
+
const genes_list = await parse_geneset_db(genedb);
|
|
130
|
+
mayLog("#################################################");
|
|
144
131
|
mayLog("####### First phase: Infer Plot Scaffolds #######");
|
|
132
|
+
mayLog("#################################################");
|
|
145
133
|
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
146
|
-
const
|
|
134
|
+
const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
|
|
135
|
+
const scaffoldResult = await inferScaffold(
|
|
136
|
+
userPrompt,
|
|
137
|
+
plotType,
|
|
138
|
+
llm,
|
|
139
|
+
genome,
|
|
140
|
+
genes_list,
|
|
141
|
+
allowedTermTypes,
|
|
142
|
+
dataset_json,
|
|
143
|
+
ds,
|
|
144
|
+
dataset_db
|
|
145
|
+
);
|
|
147
146
|
mayLog("ScaffoldResult: ", scaffoldResult);
|
|
147
|
+
if (plotType === "hiercluster" && "plot" in scaffoldResult && scaffoldResult.type === "plot" || "text" in scaffoldResult && scaffoldResult.type === "text") {
|
|
148
|
+
return scaffoldResult;
|
|
149
|
+
}
|
|
148
150
|
mayLog("Time taken to infer scaffold:", formatElapsedTime(Date.now() - time));
|
|
149
151
|
if (!scaffoldResult)
|
|
150
152
|
throw "Scaffold result is empty or undefined, which is unexpected. Please check the inferScaffold agent for potential issues.";
|
|
153
|
+
if ("type" in scaffoldResult && scaffoldResult.type === "text") {
|
|
154
|
+
return scaffoldResult;
|
|
155
|
+
}
|
|
151
156
|
const subplotType = scaffoldResult.plotType === "summary" ? scaffoldResult.chartType : void 0;
|
|
157
|
+
mayLog("#################################################");
|
|
152
158
|
mayLog("####### Second phase: From Scaffolds's phrases infer Entities #######");
|
|
153
|
-
|
|
159
|
+
mayLog("#################################################");
|
|
154
160
|
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
155
|
-
const phrase2entityResult = await phrase2entity(
|
|
161
|
+
const phrase2entityResult = await phrase2entity(
|
|
162
|
+
scaffoldResult,
|
|
163
|
+
plotType,
|
|
164
|
+
llm,
|
|
165
|
+
genes_list,
|
|
166
|
+
dataset_json,
|
|
167
|
+
ds
|
|
168
|
+
);
|
|
156
169
|
mayLog("Time taken to phrase 2 entity:", formatElapsedTime(Date.now() - time));
|
|
157
170
|
if ("type" in phrase2entityResult && phrase2entityResult.type === "text") {
|
|
158
171
|
return phrase2entityResult;
|
|
159
172
|
}
|
|
160
173
|
mayLog(phrase2entityResult);
|
|
174
|
+
mayLog("#################################################");
|
|
161
175
|
mayLog("####### Third phase: From Entities infer Term Objects #######");
|
|
162
|
-
|
|
176
|
+
mayLog("#################################################");
|
|
163
177
|
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
164
178
|
const termObj = await inferTermObjFromEntity(
|
|
165
179
|
phrase2entityResult,
|
|
@@ -170,7 +184,9 @@ async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFil
|
|
|
170
184
|
);
|
|
171
185
|
mayLog("Time taken to infer term objects:", formatElapsedTime(Date.now() - time));
|
|
172
186
|
mayLog("Inferred termObj from entity:", JSON.stringify(termObj));
|
|
187
|
+
mayLog("#################################################");
|
|
173
188
|
mayLog("####### Fourth phase: From Term Objects to TwTvs Objects #######");
|
|
189
|
+
mayLog("#################################################");
|
|
174
190
|
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
175
191
|
const twTvsObj = await resolveToTwTvs(termObj, plotType, llm, dataset_db);
|
|
176
192
|
mayLog("Time taken to resolve to TwTvs object from termObj:", formatElapsedTime(Date.now() - time));
|
|
@@ -178,7 +194,9 @@ async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFil
|
|
|
178
194
|
return twTvsObj;
|
|
179
195
|
}
|
|
180
196
|
mayLog("twTvsObj:", twTvsObj);
|
|
197
|
+
mayLog("#################################################");
|
|
181
198
|
mayLog("####### Fifth/Final phase: From TwTvs Objects to Plot States #######");
|
|
199
|
+
mayLog("#################################################");
|
|
182
200
|
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
183
201
|
ai_output_json = resolveToPlotState(twTvsObj, plotType, subplotType);
|
|
184
202
|
mayLog("Time taken to resolve to plot state:", formatElapsedTime(Date.now() - time));
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -83,7 +83,10 @@ async function getResult(q, ds) {
|
|
|
83
83
|
({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds));
|
|
84
84
|
} else if (q.dataType == PROTEOME_ABUNDANCE) {
|
|
85
85
|
;
|
|
86
|
-
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.get(
|
|
86
|
+
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.get({
|
|
87
|
+
..._q,
|
|
88
|
+
dataTypeDetails: _q.proteomeDetails
|
|
89
|
+
}));
|
|
87
90
|
} else {
|
|
88
91
|
;
|
|
89
92
|
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries[q.dataType].get(_q, ds));
|
package/routes/termdb.config.js
CHANGED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import fs from "fs/promises";
|
|
3
|
+
import { dapVolcanoPayload } from "#types/checkers";
|
|
4
|
+
import { get_ds_tdb } from "#src/termdb.js";
|
|
5
|
+
import { renderVolcano } from "../src/renderVolcano.ts";
|
|
6
|
+
import serverconfig from "../src/serverconfig.js";
|
|
7
|
+
import { countDistinctSamples } from "./termdb.proteome.ts";
|
|
8
|
+
const api = {
|
|
9
|
+
endpoint: "termdb/dapVolcano",
|
|
10
|
+
methods: {
|
|
11
|
+
get: {
|
|
12
|
+
...dapVolcanoPayload,
|
|
13
|
+
init
|
|
14
|
+
},
|
|
15
|
+
post: {
|
|
16
|
+
...dapVolcanoPayload,
|
|
17
|
+
init
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
function init({ genomes }) {
|
|
22
|
+
return async (req, res) => {
|
|
23
|
+
try {
|
|
24
|
+
const q = req.query;
|
|
25
|
+
const genome = genomes[q.genome];
|
|
26
|
+
if (!genome) throw "invalid genome";
|
|
27
|
+
const [ds] = get_ds_tdb(genome, q);
|
|
28
|
+
const proteomeConfig = ds.queries?.proteome;
|
|
29
|
+
if (!proteomeConfig) throw "proteome not configured for this dataset";
|
|
30
|
+
const organismConfig = proteomeConfig.organisms?.[q.organism];
|
|
31
|
+
if (!organismConfig) throw "invalid organism";
|
|
32
|
+
const assayConfig = organismConfig.assays?.[q.assay];
|
|
33
|
+
if (!assayConfig) throw "invalid assay";
|
|
34
|
+
const cohortConfig = assayConfig.cohorts?.[q.cohort];
|
|
35
|
+
if (!cohortConfig) throw "invalid cohort";
|
|
36
|
+
if (!cohortConfig.DAPfile) throw "DAP file not configured for this cohort";
|
|
37
|
+
const organismFilter = [{ columnIdx: organismConfig.columnIdx, columnValue: organismConfig.columnValue }];
|
|
38
|
+
const assayFilter = [{ columnIdx: assayConfig.columnIdx, columnValue: assayConfig.columnValue }];
|
|
39
|
+
const db = proteomeConfig.db;
|
|
40
|
+
const controlCount = countDistinctSamples(db, [...organismFilter, ...assayFilter, ...cohortConfig.controlFilter]);
|
|
41
|
+
const caseCount = countDistinctSamples(db, [...organismFilter, ...assayFilter, ...cohortConfig.caseFilter]);
|
|
42
|
+
if (q.countsOnly) {
|
|
43
|
+
res.send({ sample_size1: controlCount, sample_size2: caseCount });
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
const filePath = path.join(serverconfig.tpmasterdir, cohortConfig.DAPfile);
|
|
47
|
+
const content = await fs.readFile(filePath, "utf8");
|
|
48
|
+
const lines = content.trim().split("\n");
|
|
49
|
+
const rustRows = [];
|
|
50
|
+
for (let i = 1; i < lines.length; i++) {
|
|
51
|
+
const parts = lines[i].split(" ");
|
|
52
|
+
if (parts.length < 4) continue;
|
|
53
|
+
const fc = Number(parts[2]);
|
|
54
|
+
if (!Number.isFinite(fc)) continue;
|
|
55
|
+
const pValue = Number(parts[3]);
|
|
56
|
+
if (!Number.isFinite(pValue)) continue;
|
|
57
|
+
rustRows.push({
|
|
58
|
+
gene_name: parts[0],
|
|
59
|
+
gene: parts[1],
|
|
60
|
+
fold_change: fc,
|
|
61
|
+
original_p_value: pValue,
|
|
62
|
+
adjusted_p_value: pValue
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
const rendered = await renderVolcano(rustRows, q.volcanoRender);
|
|
66
|
+
for (const d of rendered.dots) delete d.adjusted_p_value;
|
|
67
|
+
res.send({
|
|
68
|
+
data: rendered,
|
|
69
|
+
sample_size1: controlCount,
|
|
70
|
+
sample_size2: caseCount
|
|
71
|
+
});
|
|
72
|
+
} catch (e) {
|
|
73
|
+
res.send({ status: "error", error: e.message || e });
|
|
74
|
+
if (e instanceof Error && e.stack) console.log(e);
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
export {
|
|
79
|
+
api
|
|
80
|
+
};
|
|
@@ -42,12 +42,12 @@ function init({ genomes }) {
|
|
|
42
42
|
term: {
|
|
43
43
|
name: term.name,
|
|
44
44
|
type: "proteomeAbundance",
|
|
45
|
-
|
|
45
|
+
dataTypeDetails: details
|
|
46
46
|
}
|
|
47
47
|
};
|
|
48
48
|
const cohortData = await ds.queries.proteome.get({
|
|
49
49
|
terms: [tw],
|
|
50
|
-
|
|
50
|
+
dataTypeDetails: details,
|
|
51
51
|
filter: q.filter,
|
|
52
52
|
filter0: q.filter0,
|
|
53
53
|
for: "proteinView",
|
|
@@ -249,7 +249,7 @@ async function validate_query_proteome(ds) {
|
|
|
249
249
|
const proteins = arg?.proteins;
|
|
250
250
|
if (!Array.isArray(proteins) || proteins.length == 0) throw "queries.proteome.find arg.proteins[] missing";
|
|
251
251
|
const matches = /* @__PURE__ */ new Set();
|
|
252
|
-
const details = arg?.
|
|
252
|
+
const details = arg?.dataTypeDetails || {};
|
|
253
253
|
const organism = details.organism;
|
|
254
254
|
const assay = details.assay;
|
|
255
255
|
const cohort = details.cohort;
|
|
@@ -257,7 +257,7 @@ async function validate_query_proteome(ds) {
|
|
|
257
257
|
const filters = [];
|
|
258
258
|
if (Object.keys(details).length) {
|
|
259
259
|
if (!organism || !assay || !cohort)
|
|
260
|
-
throw "queries.proteome.find arg.
|
|
260
|
+
throw "queries.proteome.find arg.dataTypeDetails.{organism,assay,cohort} missing";
|
|
261
261
|
const organismConfig = q.organisms?.[organism];
|
|
262
262
|
if (!organismConfig) throw `queries.proteome.find invalid organism: ${organism}`;
|
|
263
263
|
const assayConfig = organismConfig.assays?.[assay];
|
|
@@ -300,8 +300,8 @@ async function validate_query_proteome(ds) {
|
|
|
300
300
|
};
|
|
301
301
|
q.get = async (param) => {
|
|
302
302
|
if (!param?.terms?.length) throw "queries.proteome.get param.terms[] missing";
|
|
303
|
-
if (!param.
|
|
304
|
-
throw "queries.proteome.get param.
|
|
303
|
+
if (!param.dataTypeDetails?.assay || !param.dataTypeDetails?.cohort || !param.dataTypeDetails?.organism)
|
|
304
|
+
throw "queries.proteome.get param.dataTypeDetails.{assay,cohort,organism} missing";
|
|
305
305
|
return await getProteomeValuesFromCohort(ds, param, q);
|
|
306
306
|
};
|
|
307
307
|
}
|
|
@@ -329,6 +329,12 @@ function buildFilterClause(filters) {
|
|
|
329
329
|
}
|
|
330
330
|
return { conditions, params };
|
|
331
331
|
}
|
|
332
|
+
function countDistinctSamples(db, filters) {
|
|
333
|
+
if (!filters?.length) throw "countDistinctSamples: filters must not be empty";
|
|
334
|
+
const { conditions, params } = buildFilterClause(filters);
|
|
335
|
+
const row = db.prepare(`SELECT COUNT(DISTINCT sample) as cnt FROM proteome_abundance WHERE ${conditions.join(" AND ")}`).get(...params);
|
|
336
|
+
return row?.cnt || 0;
|
|
337
|
+
}
|
|
332
338
|
function queryDbRows(db, matchColumn, matchValue, filters) {
|
|
333
339
|
const { conditions, params } = buildFilterClause(filters);
|
|
334
340
|
const allConditions = [`${matchColumn} = ? COLLATE NOCASE`, ...conditions];
|
|
@@ -339,7 +345,7 @@ function queryDbRows(db, matchColumn, matchValue, filters) {
|
|
|
339
345
|
}
|
|
340
346
|
async function getProteomeValuesFromCohort(ds, param, q) {
|
|
341
347
|
const db = ds.queries.proteome.db;
|
|
342
|
-
const { assay, cohort, organism } = param.
|
|
348
|
+
const { assay, cohort, organism } = param.dataTypeDetails;
|
|
343
349
|
const organismConfig = q.organisms?.[organism];
|
|
344
350
|
if (!organismConfig) throw `queries.proteome invalid organism: ${organism}`;
|
|
345
351
|
const organismColumnIdx = organismConfig.columnIdx;
|
|
@@ -453,5 +459,6 @@ async function getProteomeValuesFromCohort(ds, param, q) {
|
|
|
453
459
|
}
|
|
454
460
|
export {
|
|
455
461
|
api,
|
|
462
|
+
countDistinctSamples,
|
|
456
463
|
validate_query_proteome
|
|
457
464
|
};
|
|
@@ -53,6 +53,10 @@ function init({ genomes }) {
|
|
|
53
53
|
cohortSamples = tmp[0];
|
|
54
54
|
} else {
|
|
55
55
|
if (!q.plotName) throw new Error("Neither plot name or coordinates where provided");
|
|
56
|
+
if (typeof ds.cohort?.scatterplots?.get == "function") {
|
|
57
|
+
const allowed = ds.cohort.scatterplots.get(q.__protected__?.clientAuthResult);
|
|
58
|
+
if (!allowed?.find((i) => i.name == q.plotName)) throw new Error("No permission to display plot");
|
|
59
|
+
}
|
|
56
60
|
if (!Array.isArray(ds.cohort?.scatterplots?.plots)) throw new Error("not supported");
|
|
57
61
|
const plot = ds.cohort.scatterplots.plots.find((p) => p.name == q.plotName);
|
|
58
62
|
if (!plot) throw new Error(`plot not found with plotName ${q.plotName}`);
|
|
@@ -89,6 +89,17 @@ async function validateSamples(q, ds) {
|
|
|
89
89
|
const S = q.samples, D = q.data;
|
|
90
90
|
const samples = /* @__PURE__ */ new Map();
|
|
91
91
|
for (const plot of D.plots) {
|
|
92
|
+
if (plot.isMetaResult) {
|
|
93
|
+
const sampleName = plot?.sampleId || plot.name.replace(/\s/g, "_");
|
|
94
|
+
const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, sampleName + (plot.fileSuffix || ""));
|
|
95
|
+
try {
|
|
96
|
+
await file_is_readable(tsvfile);
|
|
97
|
+
samples.set(sampleName, { sample: sampleName, isMetaResult: true });
|
|
98
|
+
} catch (e) {
|
|
99
|
+
throw new Error(`meta result data file missing or unreadable: ${sampleName} (${tsvfile}): ${e.message || e}`);
|
|
100
|
+
}
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
92
103
|
for (const fn of await fs.promises.readdir(path.join(serverconfig.tpmasterdir, plot.folder))) {
|
|
93
104
|
let sampleName = fn;
|
|
94
105
|
if (plot.fileSuffix) {
|
|
@@ -1,11 +1,7 @@
|
|
|
1
1
|
import { termdbTopVariablyExpressedGenesPayload } from "#types/checkers";
|
|
2
|
-
import { run_rust } from "@sjcrh/proteinpaint-rust";
|
|
3
|
-
import serverconfig from "#src/serverconfig.js";
|
|
4
2
|
import { mayLimitSamples } from "#src/mds3.filter.js";
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
7
|
-
import { joinUrl } from "#shared/joinUrl.js";
|
|
8
|
-
import { formatElapsedTime } from "#shared/time.js";
|
|
3
|
+
import { run_python } from "@sjcrh/proteinpaint-python";
|
|
4
|
+
import { mayLog } from "#src/helpers.ts";
|
|
9
5
|
const api = {
|
|
10
6
|
endpoint: "termdb/topVariablyExpressedGenes",
|
|
11
7
|
methods: {
|
|
@@ -29,27 +25,23 @@ function init({ genomes }) {
|
|
|
29
25
|
const ds = genome.datasets?.[q.dslabel];
|
|
30
26
|
if (!ds) throw "invalid dslabel";
|
|
31
27
|
if (!ds.queries?.topVariablyExpressedGenes) throw "not supported on dataset";
|
|
28
|
+
q.ds = ds;
|
|
32
29
|
const t = Date.now();
|
|
33
30
|
result = {
|
|
34
31
|
genes: await ds.queries.topVariablyExpressedGenes.getGenes(q)
|
|
35
32
|
};
|
|
36
|
-
|
|
33
|
+
mayLog("time for top variably expressed genes", Date.now() - t);
|
|
37
34
|
} catch (e) {
|
|
38
35
|
result = { status: e.status || 400, error: e.message || e };
|
|
39
36
|
}
|
|
40
37
|
res.send(result);
|
|
41
38
|
};
|
|
42
39
|
}
|
|
43
|
-
function validate_query_TopVariablyExpressedGenes(ds
|
|
40
|
+
function validate_query_TopVariablyExpressedGenes(ds) {
|
|
44
41
|
const q = ds.queries.topVariablyExpressedGenes;
|
|
45
42
|
if (!q) return;
|
|
46
|
-
if (q.
|
|
47
|
-
|
|
48
|
-
} else if (q.src == "native") {
|
|
49
|
-
nativeValidateQuery(ds);
|
|
50
|
-
} else {
|
|
51
|
-
throw "unknown topVariablyExpressedGenes.src";
|
|
52
|
-
}
|
|
43
|
+
if (typeof q.getGenes == "function") return;
|
|
44
|
+
nativeValidateQuery(ds);
|
|
53
45
|
}
|
|
54
46
|
function nativeValidateQuery(ds) {
|
|
55
47
|
const gE = ds.queries.geneExpression;
|
|
@@ -83,21 +75,7 @@ function addTopVEarg(q) {
|
|
|
83
75
|
id: "filter_extreme_values",
|
|
84
76
|
label: "Filter Extreme Values",
|
|
85
77
|
type: "boolean",
|
|
86
|
-
value: true
|
|
87
|
-
options: [
|
|
88
|
-
{
|
|
89
|
-
id: "min_count",
|
|
90
|
-
label: "Min count",
|
|
91
|
-
type: "number",
|
|
92
|
-
value: 10
|
|
93
|
-
},
|
|
94
|
-
{
|
|
95
|
-
id: "min_total_count",
|
|
96
|
-
label: "Min total count",
|
|
97
|
-
type: "number",
|
|
98
|
-
value: 15
|
|
99
|
-
}
|
|
100
|
-
]
|
|
78
|
+
value: true
|
|
101
79
|
},
|
|
102
80
|
{
|
|
103
81
|
id: "rank_type",
|
|
@@ -134,124 +112,20 @@ function addTopVEarg(q) {
|
|
|
134
112
|
q.arguments = arglst;
|
|
135
113
|
}
|
|
136
114
|
async function computeGenes4nativeDs(q, gE, samples) {
|
|
115
|
+
if (!["number", "boolean"].includes(typeof q.filter_extreme_values) || q.filter_extreme_values === void 0) {
|
|
116
|
+
q.filter_extreme_values = false;
|
|
117
|
+
}
|
|
137
118
|
const input_json = {
|
|
138
119
|
input_file: gE.file,
|
|
139
120
|
samples: samples.join(","),
|
|
140
|
-
filter_extreme_values: q.filter_extreme_values,
|
|
141
|
-
|
|
142
|
-
rank_type: q.rank_type?.type
|
|
121
|
+
filter_extreme_values: typeof q.filter_extreme_values === "number" ? Boolean(q.filter_extreme_values) : q.filter_extreme_values,
|
|
122
|
+
max_genes: q.maxGenes,
|
|
123
|
+
rank_type: q.rank_type?.type ?? "var"
|
|
143
124
|
};
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
input_json["min_total_count"] = q.min_total_count;
|
|
147
|
-
}
|
|
148
|
-
if (gE.newformat) {
|
|
149
|
-
input_json["newformat"] = true;
|
|
150
|
-
}
|
|
151
|
-
const rust_output = await run_rust("topGeneByExpressionVariance", JSON.stringify(input_json));
|
|
152
|
-
const rust_output_list = rust_output.split("\n");
|
|
153
|
-
let output_json;
|
|
154
|
-
for (const item of rust_output_list) {
|
|
155
|
-
if (item.includes("output_json:")) {
|
|
156
|
-
output_json = JSON.parse(item.replace("output_json:", ""));
|
|
157
|
-
} else {
|
|
158
|
-
console.log(item);
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
const varGenes = output_json.map((i) => i.gene_symbol);
|
|
125
|
+
const python_output = await run_python("topVEgene.py", JSON.stringify(input_json));
|
|
126
|
+
const varGenes = typeof python_output === "string" ? JSON.parse(python_output) : [];
|
|
162
127
|
return varGenes;
|
|
163
128
|
}
|
|
164
|
-
function gdcValidateQuery(ds, genome) {
|
|
165
|
-
ds.queries.topVariablyExpressedGenes.getGenes = async (q) => {
|
|
166
|
-
if (serverconfig.features.gdcGenes) {
|
|
167
|
-
console.error(
|
|
168
|
-
"!!GDC!! using serverconfig.features.gdcGenes[] but not live api query. only use this on DEV and never on PROD!"
|
|
169
|
-
);
|
|
170
|
-
return serverconfig.features.gdcGenes;
|
|
171
|
-
}
|
|
172
|
-
if (ds.label === "GDC" && !ds.__gdc?.doneCaching) {
|
|
173
|
-
throw "The server has not finished caching the case IDs: try again in about 2 minutes.";
|
|
174
|
-
}
|
|
175
|
-
const { host, headers } = ds.getHostHeaders(q);
|
|
176
|
-
try {
|
|
177
|
-
const response = await cachedFetch(
|
|
178
|
-
joinUrl(host.rest, "/gene_expression/gene_selection"),
|
|
179
|
-
{
|
|
180
|
-
method: "POST",
|
|
181
|
-
headers,
|
|
182
|
-
body: getGeneSelectionArg(q)
|
|
183
|
-
},
|
|
184
|
-
{
|
|
185
|
-
// noCache: true, // !!! for testing only !!!
|
|
186
|
-
getErrMessage: (response2) => {
|
|
187
|
-
const body = response2?.body || response2;
|
|
188
|
-
return Array.isArray(body?.gene_selection) ? "" : body?.message || body?.error || JSON.stringify(body);
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
);
|
|
192
|
-
const re = response.body;
|
|
193
|
-
const genes = [];
|
|
194
|
-
if (!Array.isArray(re.gene_selection)) {
|
|
195
|
-
throw "re.gene_selection[] is not array: " + JSON.stringify(re);
|
|
196
|
-
}
|
|
197
|
-
for (const i of re.gene_selection) {
|
|
198
|
-
if (i.gene_id && typeof i.gene_id == "string") {
|
|
199
|
-
const t = genome.genedb.getNameByAlias.get(i.gene_id);
|
|
200
|
-
if (t) genes.push(t.name);
|
|
201
|
-
} else if (i.symbol && typeof i.symbol == "string") {
|
|
202
|
-
genes.push(i.symbol);
|
|
203
|
-
} else {
|
|
204
|
-
throw "one of re.gene_selection[] is missing both gene_id and symbol";
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
return genes;
|
|
208
|
-
} catch (e) {
|
|
209
|
-
console.error(e.stack || e);
|
|
210
|
-
throw e;
|
|
211
|
-
}
|
|
212
|
-
};
|
|
213
|
-
function getGeneSelectionArg(q) {
|
|
214
|
-
const arg = {
|
|
215
|
-
// add any to avoid tsc err
|
|
216
|
-
case_filters: makeFilter(q),
|
|
217
|
-
selection_size: q.maxGenes,
|
|
218
|
-
min_median_log2_uqfpkm: q.min_median_log2_uqfpkm
|
|
219
|
-
};
|
|
220
|
-
if (q.geneSet) {
|
|
221
|
-
if (q.geneSet.type == "all") {
|
|
222
|
-
arg.gene_type = "protein_coding";
|
|
223
|
-
} else if (q.geneSet.type == "custom" || q.geneSet.type == "msigdb") {
|
|
224
|
-
if (!Array.isArray(q.geneSet.geneList)) throw "q.geneSet.geneList is not array";
|
|
225
|
-
arg.gene_ids = map2ensg(q.geneSet.geneList, genome);
|
|
226
|
-
if (arg.gene_ids.length == 0) throw "no valid genes from custom gene set";
|
|
227
|
-
} else {
|
|
228
|
-
throw "unknown q.geneSet.type";
|
|
229
|
-
}
|
|
230
|
-
} else {
|
|
231
|
-
arg.gene_type = "protein_coding";
|
|
232
|
-
}
|
|
233
|
-
return arg;
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
function map2ensg(lst, genome) {
|
|
237
|
-
const ensg = [];
|
|
238
|
-
for (const name of lst) {
|
|
239
|
-
if (name.startsWith("ENSG") && name.length == 15) {
|
|
240
|
-
ensg.push(name);
|
|
241
|
-
continue;
|
|
242
|
-
}
|
|
243
|
-
const tmp = genome.genedb.getAliasByName.all(name);
|
|
244
|
-
if (Array.isArray(tmp)) {
|
|
245
|
-
for (const a of tmp) {
|
|
246
|
-
if (a.alias.startsWith("ENSG")) {
|
|
247
|
-
ensg.push(a.alias);
|
|
248
|
-
break;
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
return ensg;
|
|
254
|
-
}
|
|
255
129
|
export {
|
|
256
130
|
api,
|
|
257
131
|
validate_query_TopVariablyExpressedGenes
|