@sjcrh/proteinpaint-server 2.177.0 → 2.178.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +23 -2
- package/package.json +6 -6
- package/routes/grin2.js +25 -6
- package/routes/termdb.chat.js +101 -30
- package/routes/termdb.chat2.js +153 -0
- package/routes/termdb.cluster.js +10 -0
- package/routes/termdb.config.js +19 -4
- package/routes/termdb.diffMeth.js +180 -0
- package/routes/termdb.dmr.js +48 -0
- package/routes/termdb.sampleScatter.js +12 -2
- package/routes/termdb.singlecellSamples.js +40 -4
- package/src/app.js +2885 -692
- package/src/serverconfig.js +1 -1
package/dataset/termdb.test.js
CHANGED
|
@@ -210,18 +210,31 @@ function termdb_test_default() {
|
|
|
210
210
|
defaultTw4correlationPlot: {
|
|
211
211
|
disease: { id: "diaggrp", q: {} }
|
|
212
212
|
},
|
|
213
|
-
|
|
213
|
+
termCollections: [
|
|
214
214
|
{
|
|
215
215
|
name: "Fake Collection 1",
|
|
216
|
+
type: "numeric",
|
|
216
217
|
termIds: ["agedx", "a_death", "a_ndi", "agelastvisit"],
|
|
217
218
|
branchIds: ["Demographic Variables", "Age (years)"],
|
|
218
219
|
propsByTermId: {}
|
|
219
220
|
},
|
|
220
221
|
{
|
|
221
222
|
name: "Fake Collection 2",
|
|
223
|
+
type: "numeric",
|
|
222
224
|
termIds: ["a_death", "a_ndi", "agelastvisit"],
|
|
223
225
|
branchIds: ["Demographic Variables", "Age (years)"],
|
|
224
226
|
propsByTermId: {}
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
name: "Assay Availability",
|
|
230
|
+
type: "categorical",
|
|
231
|
+
categoryKeys: [
|
|
232
|
+
{ key: "1", shown: true },
|
|
233
|
+
{ key: "2", shown: true }
|
|
234
|
+
],
|
|
235
|
+
termIds: ["assayavailability_cnv", "assayavailability_fusion", "assayavailability_germline"],
|
|
236
|
+
branchIds: [""],
|
|
237
|
+
propsByTermId: {}
|
|
225
238
|
}
|
|
226
239
|
]
|
|
227
240
|
},
|
|
@@ -373,6 +386,14 @@ function termdb_test_default() {
|
|
|
373
386
|
ssGSEA: {
|
|
374
387
|
file: "files/hg38/TermdbTest/rnaseq/TermdbTest.ssgsea.h5"
|
|
375
388
|
},
|
|
389
|
+
dnaMethylation: {
|
|
390
|
+
file: "files/hg38/TermdbTest/dnaMeth.h5",
|
|
391
|
+
unit: "Average Beta Value",
|
|
392
|
+
promoter: {
|
|
393
|
+
file: "files/hg38/TermdbTest/dnaMethPromoterMvalue.h5",
|
|
394
|
+
unit: "M-value"
|
|
395
|
+
}
|
|
396
|
+
},
|
|
376
397
|
topVariablyExpressedGenes: {
|
|
377
398
|
src: "native"
|
|
378
399
|
},
|
|
@@ -417,7 +438,7 @@ function termdb_test_default() {
|
|
|
417
438
|
jsonFile: "files/hg38/TermdbTest/trackLst/facet.json",
|
|
418
439
|
activeTracks: ["bw 1", "bed 1"]
|
|
419
440
|
},
|
|
420
|
-
chat: {}
|
|
441
|
+
chat: { aifiles: "./proteinpaint/server/dataset/ai/termdb.test.json" }
|
|
421
442
|
}
|
|
422
443
|
};
|
|
423
444
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.178.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -62,11 +62,11 @@
|
|
|
62
62
|
},
|
|
63
63
|
"dependencies": {
|
|
64
64
|
"@sjcrh/augen": "2.143.0",
|
|
65
|
-
"@sjcrh/proteinpaint-python": "2.
|
|
66
|
-
"@sjcrh/proteinpaint-r": "2.
|
|
67
|
-
"@sjcrh/proteinpaint-rust": "2.
|
|
68
|
-
"@sjcrh/proteinpaint-shared": "2.
|
|
69
|
-
"@sjcrh/proteinpaint-types": "2.
|
|
65
|
+
"@sjcrh/proteinpaint-python": "2.178.0",
|
|
66
|
+
"@sjcrh/proteinpaint-r": "2.178.0",
|
|
67
|
+
"@sjcrh/proteinpaint-rust": "2.178.0",
|
|
68
|
+
"@sjcrh/proteinpaint-shared": "2.178.0",
|
|
69
|
+
"@sjcrh/proteinpaint-types": "2.178.0",
|
|
70
70
|
"@types/express": "^5.0.0",
|
|
71
71
|
"@types/express-session": "^1.18.1",
|
|
72
72
|
"better-sqlite3": "^12.4.1",
|
package/routes/grin2.js
CHANGED
|
@@ -8,6 +8,7 @@ import os from "os";
|
|
|
8
8
|
import { get_samples } from "#src/termdb.sql.js";
|
|
9
9
|
import { read_file, file_is_readable } from "#src/utils.js";
|
|
10
10
|
import { dtsnvindel, dtcnv, dtfusionrna, dtsv, dt2lesion, optionToDt, formatElapsedTime } from "#shared";
|
|
11
|
+
import { mayFilterByMaf } from "#src/mds3.init.js";
|
|
11
12
|
import crypto from "crypto";
|
|
12
13
|
import { promisify } from "node:util";
|
|
13
14
|
import { exec as execCallback } from "node:child_process";
|
|
@@ -32,6 +33,7 @@ const api = {
|
|
|
32
33
|
};
|
|
33
34
|
function init({ genomes }) {
|
|
34
35
|
return async (req, res) => {
|
|
36
|
+
const signal = req.query.__abortSignal;
|
|
35
37
|
try {
|
|
36
38
|
const request = req.query;
|
|
37
39
|
const g = genomes[request.genome];
|
|
@@ -39,9 +41,13 @@ function init({ genomes }) {
|
|
|
39
41
|
const ds = g.datasets?.[request.dslabel];
|
|
40
42
|
if (!ds) throw new Error("ds missing");
|
|
41
43
|
if (!ds.queries?.singleSampleMutation) throw new Error("singleSampleMutation query missing from dataset");
|
|
42
|
-
const result = await runGrin2WithLimit(g, ds, request);
|
|
44
|
+
const result = await runGrin2WithLimit(g, ds, request, signal);
|
|
43
45
|
res.json(result);
|
|
44
46
|
} catch (e) {
|
|
47
|
+
if (signal?.aborted) {
|
|
48
|
+
mayLog("[GRIN2] Analysis aborted due to client disconnect");
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
45
51
|
console.error("[GRIN2] Error stack:", e.stack);
|
|
46
52
|
const errorResponse = {
|
|
47
53
|
status: "error",
|
|
@@ -93,7 +99,7 @@ async function getMaxLesions() {
|
|
|
93
99
|
return MAX_LESIONS;
|
|
94
100
|
}
|
|
95
101
|
let activeGrin2Jobs = 0;
|
|
96
|
-
async function runGrin2WithLimit(g, ds, request) {
|
|
102
|
+
async function runGrin2WithLimit(g, ds, request, signal) {
|
|
97
103
|
if (activeGrin2Jobs >= GRIN2_CONCURRENCY_LIMIT) {
|
|
98
104
|
const error = new Error(
|
|
99
105
|
`GRIN2 analysis queue is full (${GRIN2_CONCURRENCY_LIMIT} concurrent analyses). Please try again in a few minutes.`
|
|
@@ -105,7 +111,7 @@ async function runGrin2WithLimit(g, ds, request) {
|
|
|
105
111
|
activeGrin2Jobs++;
|
|
106
112
|
mayLog(`[GRIN2] Starting analysis. Active jobs: ${activeGrin2Jobs}/${GRIN2_CONCURRENCY_LIMIT}`);
|
|
107
113
|
try {
|
|
108
|
-
return await runGrin2(g, ds, request);
|
|
114
|
+
return await runGrin2(g, ds, request, signal);
|
|
109
115
|
} finally {
|
|
110
116
|
activeGrin2Jobs--;
|
|
111
117
|
mayLog(`[GRIN2] Analysis complete. Active jobs: ${activeGrin2Jobs}/${GRIN2_CONCURRENCY_LIMIT}`);
|
|
@@ -136,7 +142,7 @@ function getCnvLesionType(isGain) {
|
|
|
136
142
|
}
|
|
137
143
|
return lesionType.lesionType;
|
|
138
144
|
}
|
|
139
|
-
async function runGrin2(g, ds, request) {
|
|
145
|
+
async function runGrin2(g, ds, request, signal) {
|
|
140
146
|
const startTime = Date.now();
|
|
141
147
|
const samples = await get_samples(
|
|
142
148
|
request,
|
|
@@ -176,7 +182,7 @@ async function runGrin2(g, ds, request) {
|
|
|
176
182
|
pyInput.chromosomelist[c] = g.majorchr[c];
|
|
177
183
|
}
|
|
178
184
|
const grin2AnalysisStart = Date.now();
|
|
179
|
-
const pyResult = await run_python("grin2PpWrapper.py", JSON.stringify(pyInput));
|
|
185
|
+
const pyResult = await run_python("grin2PpWrapper.py", JSON.stringify(pyInput), { signal });
|
|
180
186
|
if (pyResult.stderr?.trim()) {
|
|
181
187
|
mayLog(`[GRIN2] Python stderr: ${pyResult.stderr}`);
|
|
182
188
|
if (pyResult.stderr.includes("ERROR:")) {
|
|
@@ -201,7 +207,7 @@ async function runGrin2(g, ds, request) {
|
|
|
201
207
|
bin_size: request.binSize
|
|
202
208
|
};
|
|
203
209
|
const manhattanPlotStart = Date.now();
|
|
204
|
-
const rsResult = await run_rust("manhattan_plot", JSON.stringify(rustInput));
|
|
210
|
+
const rsResult = await run_rust("manhattan_plot", JSON.stringify(rustInput), [], { signal });
|
|
205
211
|
const manhattanPlotTime = Date.now() - manhattanPlotStart;
|
|
206
212
|
mayLog(`[GRIN2] Manhattan plot generation took ${formatElapsedTime(manhattanPlotTime)}`);
|
|
207
213
|
const manhattanPlotData = JSON.parse(rsResult);
|
|
@@ -431,6 +437,19 @@ function filterAndConvertSnvIndel(sampleName, entry, options) {
|
|
|
431
437
|
if (!Number.isInteger(entry.pos)) {
|
|
432
438
|
return null;
|
|
433
439
|
}
|
|
440
|
+
if (options.mafFilter?.lst?.length) {
|
|
441
|
+
if (!Array.isArray(entry.vafs)) return null;
|
|
442
|
+
const copy = { dt: dtsnvindel };
|
|
443
|
+
for (const v of entry.vafs) {
|
|
444
|
+
copy[v.id] = v.refCount + "," + v.altCount;
|
|
445
|
+
}
|
|
446
|
+
try {
|
|
447
|
+
if (!mayFilterByMaf(options.mafFilter, copy)) return null;
|
|
448
|
+
} catch (e) {
|
|
449
|
+
mayLog("mayFilterByMaf() crashed on a snvindel " + (e instanceof Error ? e.message : String(e)));
|
|
450
|
+
return null;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
434
453
|
const start = entry.pos;
|
|
435
454
|
const end = entry.pos;
|
|
436
455
|
return [sampleName, entry.chr, start, end, dt2lesion[dtsnvindel].lesionTypes[0].lesionType];
|
package/routes/termdb.chat.js
CHANGED
|
@@ -2,6 +2,7 @@ import fs from "fs";
|
|
|
2
2
|
import { ezFetch } from "#shared";
|
|
3
3
|
import { get_samples } from "#src/termdb.sql.js";
|
|
4
4
|
import { ChatPayload } from "#types/checkers";
|
|
5
|
+
import { extractResourceResponse } from "./chat/resource.ts";
|
|
5
6
|
import serverconfig from "../src/serverconfig.js";
|
|
6
7
|
import { mayLog } from "#src/helpers.ts";
|
|
7
8
|
import Database from "better-sqlite3";
|
|
@@ -143,8 +144,15 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
|
|
|
143
144
|
);
|
|
144
145
|
let ai_output_json;
|
|
145
146
|
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
146
|
-
if (class_response.type == "
|
|
147
|
-
ai_output_json =
|
|
147
|
+
if (class_response.type == "none") {
|
|
148
|
+
ai_output_json = {
|
|
149
|
+
type: "text",
|
|
150
|
+
text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
|
|
151
|
+
};
|
|
152
|
+
} else if (class_response.type == "resource") {
|
|
153
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
154
|
+
ai_output_json = await extractResourceResponse(user_prompt, llm, dataset_json);
|
|
155
|
+
mayLog("Time taken for resource agent:", formatElapsedTime(Date.now() - time12));
|
|
148
156
|
} else if (class_response.type == "plot") {
|
|
149
157
|
const classResult = class_response.plot;
|
|
150
158
|
mayLog("classResult:", classResult);
|
|
@@ -174,7 +182,7 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
|
|
|
174
182
|
);
|
|
175
183
|
mayLog("Time taken for DE agent:", formatElapsedTime(Date.now() - time12));
|
|
176
184
|
} else if (classResult == "survival") {
|
|
177
|
-
ai_output_json = { type: "
|
|
185
|
+
ai_output_json = { type: "text", text: "survival agent has not been implemented yet" };
|
|
178
186
|
} else if (classResult == "matrix") {
|
|
179
187
|
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
180
188
|
ai_output_json = await extract_matrix_search_terms_from_query(
|
|
@@ -200,13 +208,10 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
|
|
|
200
208
|
);
|
|
201
209
|
mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
|
|
202
210
|
} else {
|
|
203
|
-
ai_output_json = { type: "
|
|
211
|
+
ai_output_json = { type: "text", text: "Unknown classification value" };
|
|
204
212
|
}
|
|
205
213
|
} else {
|
|
206
|
-
ai_output_json = {
|
|
207
|
-
type: "html",
|
|
208
|
-
html: "Unknown classification type"
|
|
209
|
-
};
|
|
214
|
+
ai_output_json = { type: "text", text: "Unknown classification type" };
|
|
210
215
|
}
|
|
211
216
|
return ai_output_json;
|
|
212
217
|
}
|
|
@@ -870,28 +875,72 @@ function removeLastOccurrence(str, word) {
|
|
|
870
875
|
return str.slice(0, index) + str.slice(index + word.length);
|
|
871
876
|
}
|
|
872
877
|
}
|
|
878
|
+
function sortSameCategoricalFilterKeys(filters, ds) {
|
|
879
|
+
let html = "";
|
|
880
|
+
const keys = filters.map((f) => f.term);
|
|
881
|
+
if (new Set(keys).size == keys.length) return { filters, html };
|
|
882
|
+
const seen = /* @__PURE__ */ new Set();
|
|
883
|
+
const categorical_filter_terms_with_multiple_fields = /* @__PURE__ */ new Set();
|
|
884
|
+
for (const item of filters) {
|
|
885
|
+
if (seen.has(item.term)) categorical_filter_terms_with_multiple_fields.add(item.term);
|
|
886
|
+
else seen.add(item.term);
|
|
887
|
+
}
|
|
888
|
+
const multiple_fields_keys = [];
|
|
889
|
+
for (const key of categorical_filter_terms_with_multiple_fields) {
|
|
890
|
+
const term = ds.cohort.termdb.q.termjsonByOneid(key);
|
|
891
|
+
if (!term) {
|
|
892
|
+
html += "invalid filter id:" + key;
|
|
893
|
+
} else {
|
|
894
|
+
if (term.type == "categorical") {
|
|
895
|
+
const multiple_fields = filters.filter((x) => x.term == key);
|
|
896
|
+
multiple_fields_keys.push({ key, categories: multiple_fields.map((f) => f.category) });
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
const sorted_filter = [];
|
|
901
|
+
const seen2 = /* @__PURE__ */ new Set();
|
|
902
|
+
for (const f of filters) {
|
|
903
|
+
const repeated_term = multiple_fields_keys.find((x) => x.key == f.term);
|
|
904
|
+
if (!repeated_term) {
|
|
905
|
+
sorted_filter.push(f);
|
|
906
|
+
} else {
|
|
907
|
+
if (!seen2.has(f.term)) {
|
|
908
|
+
const new_filter_term = {
|
|
909
|
+
term: f.term,
|
|
910
|
+
category: repeated_term.categories
|
|
911
|
+
};
|
|
912
|
+
seen2.add(f.term);
|
|
913
|
+
sorted_filter.push(new_filter_term);
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
return { filters: sorted_filter, html };
|
|
918
|
+
}
|
|
873
919
|
function validate_filter(filters, ds, group_name) {
|
|
874
920
|
if (!Array.isArray(filters)) throw "filter is not array";
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
921
|
+
const sorted_filters = sortSameCategoricalFilterKeys(filters, ds);
|
|
922
|
+
let filter_result = { html: sorted_filters.html };
|
|
923
|
+
if (sorted_filters.filters.length <= 2) {
|
|
924
|
+
const generated = generate_filter_term(sorted_filters.filters, ds);
|
|
925
|
+
filter_result.simplefilter = generated.simplefilter;
|
|
926
|
+
filter_result.html += generated.html;
|
|
878
927
|
} else {
|
|
879
|
-
if (filters.length > num_filter_cutoff) {
|
|
880
|
-
filter_result.html
|
|
928
|
+
if (sorted_filters.filters.length > num_filter_cutoff) {
|
|
929
|
+
filter_result.html += "For now, the maximum number of filter terms supported through the chatbot is " + num_filter_cutoff;
|
|
881
930
|
if (group_name.length > 0) {
|
|
882
|
-
filter_result.html += " . The number of filter terms for group " + group_name + " is " + filters.length + "\n";
|
|
931
|
+
filter_result.html += " . The number of filter terms for group " + group_name + " is " + sorted_filters.filters.length + "\n";
|
|
883
932
|
} else {
|
|
884
|
-
filter_result.html += "The number of filter terms for this query is " + filters.length;
|
|
933
|
+
filter_result.html += "The number of filter terms for this query is " + sorted_filters.filters.length;
|
|
885
934
|
}
|
|
886
935
|
} else {
|
|
887
|
-
for (let i = 0; i < filters.length - 1; i++) {
|
|
936
|
+
for (let i = 0; i < sorted_filters.filters.length - 1; i++) {
|
|
888
937
|
const filter_lst = [];
|
|
889
938
|
if (i == 0) {
|
|
890
|
-
filter_lst.push(filters[i]);
|
|
939
|
+
filter_lst.push(sorted_filters.filters[i]);
|
|
891
940
|
} else {
|
|
892
941
|
filter_lst.push(filter_result.simplefilter);
|
|
893
942
|
}
|
|
894
|
-
filter_lst.push(filters[i + 1]);
|
|
943
|
+
filter_lst.push(sorted_filters.filters[i + 1]);
|
|
895
944
|
filter_result = generate_filter_term(filter_lst, ds);
|
|
896
945
|
}
|
|
897
946
|
}
|
|
@@ -913,19 +962,35 @@ function generate_filter_term(filters, ds) {
|
|
|
913
962
|
localfilter.join = f.join;
|
|
914
963
|
}
|
|
915
964
|
if (term.type == "categorical") {
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
965
|
+
if (Array.isArray(f.category)) {
|
|
966
|
+
const categories = [];
|
|
967
|
+
for (const category of f.category) {
|
|
968
|
+
const cat = findCategoryKey(term.values, category);
|
|
969
|
+
if (!cat) invalid_html += "invalid category from " + JSON.stringify(f);
|
|
970
|
+
else {
|
|
971
|
+
categories.push({ key: cat });
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
localfilter.lst.push({
|
|
975
|
+
type: "tvs",
|
|
976
|
+
tvs: {
|
|
977
|
+
term,
|
|
978
|
+
values: categories
|
|
979
|
+
}
|
|
980
|
+
});
|
|
981
|
+
} else {
|
|
982
|
+
const cat = findCategoryKey(term.values, f.category);
|
|
983
|
+
if (!cat) invalid_html += "invalid category from " + JSON.stringify(f);
|
|
984
|
+
else {
|
|
985
|
+
localfilter.lst.push({
|
|
986
|
+
type: "tvs",
|
|
987
|
+
tvs: {
|
|
988
|
+
term,
|
|
989
|
+
values: [{ key: cat }]
|
|
990
|
+
}
|
|
991
|
+
});
|
|
927
992
|
}
|
|
928
|
-
}
|
|
993
|
+
}
|
|
929
994
|
} else if (term.type == "float" || term.type == "integer") {
|
|
930
995
|
const numeric = {
|
|
931
996
|
type: "tvs",
|
|
@@ -958,6 +1023,12 @@ function generate_filter_term(filters, ds) {
|
|
|
958
1023
|
}
|
|
959
1024
|
return { simplefilter: localfilter, html: invalid_html };
|
|
960
1025
|
}
|
|
1026
|
+
function findCategoryKey(termValues, category) {
|
|
1027
|
+
for (const ck in termValues) {
|
|
1028
|
+
if (ck === category || termValues[ck].label === category) return ck;
|
|
1029
|
+
}
|
|
1030
|
+
return void 0;
|
|
1031
|
+
}
|
|
961
1032
|
async function parse_geneset_db(genedb) {
|
|
962
1033
|
let genes_list = [];
|
|
963
1034
|
const db = new Database(genedb);
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import { ChatPayload } from "#types/checkers";
|
|
2
|
+
import { classifyQuery } from "./chat/classify1.ts";
|
|
3
|
+
import { classifyNotPlot } from "./chat/classify2.ts";
|
|
4
|
+
import { classifyPlotType } from "./chat/plot.ts";
|
|
5
|
+
import { readJSONFile } from "./chat/utils.ts";
|
|
6
|
+
import { extract_DE_search_terms_from_query } from "./chat/DEagent.ts";
|
|
7
|
+
import { extract_summary_terms } from "./chat/summaryagent.ts";
|
|
8
|
+
import { extract_matrix_search_terms_from_query } from "./chat/matrixagent.ts";
|
|
9
|
+
import { extract_samplescatter_terms_from_query } from "./chat/samplescatteragent.ts";
|
|
10
|
+
import { parse_dataset_db, parse_geneset_db, getGenesetNames } from "./chat/utils.ts";
|
|
11
|
+
import serverconfig from "../src/serverconfig.js";
|
|
12
|
+
import { mayLog } from "#src/helpers.ts";
|
|
13
|
+
import { formatElapsedTime } from "#shared";
|
|
14
|
+
const api = {
|
|
15
|
+
endpoint: "termdb/chat2",
|
|
16
|
+
methods: {
|
|
17
|
+
get: {
|
|
18
|
+
...ChatPayload,
|
|
19
|
+
init
|
|
20
|
+
},
|
|
21
|
+
post: {
|
|
22
|
+
...ChatPayload,
|
|
23
|
+
init
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
function init({ genomes }) {
|
|
28
|
+
return async (req, res) => {
|
|
29
|
+
const q = req.query;
|
|
30
|
+
try {
|
|
31
|
+
const g = genomes[q.genome];
|
|
32
|
+
if (!g) throw "invalid genome";
|
|
33
|
+
const ds = g.datasets?.[q.dslabel];
|
|
34
|
+
if (!ds) throw "invalid dslabel";
|
|
35
|
+
const serverconfig_ds_entries = serverconfig.genomes.find((genome) => genome.name == q.genome).datasets.find((dslabel) => dslabel.name == ds.label);
|
|
36
|
+
if (!serverconfig_ds_entries.aifiles) {
|
|
37
|
+
throw "aifiles are missing for chatbot to work";
|
|
38
|
+
}
|
|
39
|
+
const llm = serverconfig.llm;
|
|
40
|
+
if (!llm) throw "serverconfig.llm is not configured";
|
|
41
|
+
if (llm.provider !== "SJ" && llm.provider !== "ollama") {
|
|
42
|
+
throw "llm.provider must be 'SJ' or 'ollama'";
|
|
43
|
+
}
|
|
44
|
+
const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
|
|
45
|
+
const genedb = serverconfig.tpmasterdir + "/" + g.genedb.dbfile;
|
|
46
|
+
const aiFilesPath = serverconfig_ds_entries.aifiles;
|
|
47
|
+
const dataset_json = await readJSONFile(aiFilesPath);
|
|
48
|
+
const testing = false;
|
|
49
|
+
const genesetNames = getGenesetNames(g);
|
|
50
|
+
const ai_output_json = await run_chat_pipeline(
|
|
51
|
+
q.prompt,
|
|
52
|
+
llm,
|
|
53
|
+
serverconfig.aiRoute,
|
|
54
|
+
dataset_json,
|
|
55
|
+
testing,
|
|
56
|
+
dataset_db,
|
|
57
|
+
genedb,
|
|
58
|
+
ds,
|
|
59
|
+
genesetNames
|
|
60
|
+
);
|
|
61
|
+
res.send(ai_output_json);
|
|
62
|
+
} catch (e) {
|
|
63
|
+
if (e.stack) mayLog(e.stack);
|
|
64
|
+
res.send({ error: e?.message || e });
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testing, dataset_db, genedb, ds, genesetNames = []) {
|
|
69
|
+
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
70
|
+
const class_response = await classifyQuery(user_prompt, llm);
|
|
71
|
+
let ai_output_json;
|
|
72
|
+
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
73
|
+
if (class_response.type == "notplot") {
|
|
74
|
+
const time2 = (/* @__PURE__ */ new Date()).valueOf();
|
|
75
|
+
const notPlotResult = await classifyNotPlot(user_prompt, llm, dataset_json);
|
|
76
|
+
mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
|
|
77
|
+
if (notPlotResult.type == "html") {
|
|
78
|
+
ai_output_json = notPlotResult;
|
|
79
|
+
} else {
|
|
80
|
+
ai_output_json = {
|
|
81
|
+
type: "text",
|
|
82
|
+
text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
} else if (class_response.type == "plot") {
|
|
86
|
+
const classResult = await classifyPlotType(user_prompt, llm);
|
|
87
|
+
mayLog("classResult:", classResult);
|
|
88
|
+
const dataset_db_output = await parse_dataset_db(dataset_db);
|
|
89
|
+
const genes_list = dataset_json.hasGeneExpression ? await parse_geneset_db(genedb) : [];
|
|
90
|
+
if (classResult == "summary") {
|
|
91
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
92
|
+
ai_output_json = await extract_summary_terms(
|
|
93
|
+
user_prompt,
|
|
94
|
+
llm,
|
|
95
|
+
dataset_db_output,
|
|
96
|
+
dataset_json,
|
|
97
|
+
genes_list,
|
|
98
|
+
ds,
|
|
99
|
+
testing,
|
|
100
|
+
genesetNames
|
|
101
|
+
);
|
|
102
|
+
mayLog("Time taken for summary agent:", formatElapsedTime(Date.now() - time12));
|
|
103
|
+
} else if (classResult == "dge") {
|
|
104
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
105
|
+
ai_output_json = await extract_DE_search_terms_from_query(
|
|
106
|
+
user_prompt,
|
|
107
|
+
llm,
|
|
108
|
+
dataset_db_output,
|
|
109
|
+
dataset_json,
|
|
110
|
+
ds,
|
|
111
|
+
testing
|
|
112
|
+
);
|
|
113
|
+
mayLog("Time taken for DE agent:", formatElapsedTime(Date.now() - time12));
|
|
114
|
+
} else if (classResult == "survival") {
|
|
115
|
+
ai_output_json = { type: "text", text: "survival agent has not been implemented yet" };
|
|
116
|
+
} else if (classResult == "matrix") {
|
|
117
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
118
|
+
ai_output_json = await extract_matrix_search_terms_from_query(
|
|
119
|
+
user_prompt,
|
|
120
|
+
llm,
|
|
121
|
+
dataset_db_output,
|
|
122
|
+
dataset_json,
|
|
123
|
+
genes_list,
|
|
124
|
+
ds,
|
|
125
|
+
testing,
|
|
126
|
+
genesetNames
|
|
127
|
+
);
|
|
128
|
+
mayLog("Time taken for matrix agent:", formatElapsedTime(Date.now() - time12));
|
|
129
|
+
} else if (classResult == "samplescatter") {
|
|
130
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
131
|
+
ai_output_json = await extract_samplescatter_terms_from_query(
|
|
132
|
+
user_prompt,
|
|
133
|
+
llm,
|
|
134
|
+
dataset_db_output,
|
|
135
|
+
dataset_json,
|
|
136
|
+
genes_list,
|
|
137
|
+
ds,
|
|
138
|
+
testing,
|
|
139
|
+
genesetNames
|
|
140
|
+
);
|
|
141
|
+
mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
|
|
142
|
+
} else {
|
|
143
|
+
ai_output_json = { type: "text", text: "Unknown classification value" };
|
|
144
|
+
}
|
|
145
|
+
} else {
|
|
146
|
+
ai_output_json = { type: "text", text: "Unknown classification type" };
|
|
147
|
+
}
|
|
148
|
+
return ai_output_json;
|
|
149
|
+
}
|
|
150
|
+
export {
|
|
151
|
+
api,
|
|
152
|
+
run_chat_pipeline
|
|
153
|
+
};
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -44,6 +44,13 @@ function init({ genomes }) {
|
|
|
44
44
|
if (q.terms.length < 3)
|
|
45
45
|
throw `A minimum of three genes is required for clustering. Please refresh this page to clear this error.`;
|
|
46
46
|
result = await getResult(q, ds);
|
|
47
|
+
} else if (TermTypes.WHOLE_PROTEOME_ABUNDANCE == q.dataType) {
|
|
48
|
+
if (!ds.queries?.proteome?.whole) throw `no ${TermTypes.WHOLE_PROTEOME_ABUNDANCE} data on this dataset`;
|
|
49
|
+
if (!q.terms) throw `missing gene list`;
|
|
50
|
+
if (!Array.isArray(q.terms)) throw `gene list is not an array`;
|
|
51
|
+
if (q.terms.length < 3)
|
|
52
|
+
throw `A minimum of three genes is required for clustering. Please refresh this page to clear this error.`;
|
|
53
|
+
result = await getResult(q, ds);
|
|
47
54
|
} else {
|
|
48
55
|
throw "unknown q.dataType " + q.dataType;
|
|
49
56
|
}
|
|
@@ -68,6 +75,9 @@ async function getResult(q, ds) {
|
|
|
68
75
|
if (q.dataType == NUMERIC_DICTIONARY_TERM) {
|
|
69
76
|
;
|
|
70
77
|
({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds));
|
|
78
|
+
} else if (q.dataType == TermTypes.WHOLE_PROTEOME_ABUNDANCE) {
|
|
79
|
+
;
|
|
80
|
+
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.whole.get(_q, ds));
|
|
71
81
|
} else {
|
|
72
82
|
;
|
|
73
83
|
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries[q.dataType].get(_q, ds));
|
package/routes/termdb.config.js
CHANGED
|
@@ -72,7 +72,7 @@ function make(q, req, res, ds, genome) {
|
|
|
72
72
|
if (tdb.survival) c.survival = tdb.survival;
|
|
73
73
|
if (tdb.regression) c.regression = tdb.regression;
|
|
74
74
|
if (tdb.uiLabels) c.uiLabels = tdb.uiLabels;
|
|
75
|
-
if (tdb.
|
|
75
|
+
if (tdb.termCollections) c.termCollections = tdb.termCollections;
|
|
76
76
|
if (ds.assayAvailability) c.assayAvailability = ds.assayAvailability;
|
|
77
77
|
if (ds.cohort.correlationVolcano) c.correlationVolcano = ds.cohort.correlationVolcano;
|
|
78
78
|
if (ds.cohort.boxplots) c.boxplots = ds.cohort.boxplots;
|
|
@@ -124,8 +124,8 @@ function addMatrixplots(c, ds) {
|
|
|
124
124
|
});
|
|
125
125
|
}
|
|
126
126
|
function addMutationSignatureplots(c, ds) {
|
|
127
|
-
const mutationSignatureplots = ds.cohort.termdb.
|
|
128
|
-
(
|
|
127
|
+
const mutationSignatureplots = ds.cohort.termdb.termCollections?.find(
|
|
128
|
+
(tc) => tc.name == "Mutation Signature" && tc.type === "numeric"
|
|
129
129
|
)?.plots;
|
|
130
130
|
if (!mutationSignatureplots) return;
|
|
131
131
|
c.mutationSignatureplots = mutationSignatureplots.map((p) => {
|
|
@@ -189,6 +189,12 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
189
189
|
if (q.geneExpression) {
|
|
190
190
|
q2.geneExpression = { unit: q.geneExpression.unit };
|
|
191
191
|
}
|
|
192
|
+
if (q.dnaMethylation) {
|
|
193
|
+
q2.dnaMethylation = { unit: q.dnaMethylation.unit };
|
|
194
|
+
if (q.dnaMethylation.promoter) {
|
|
195
|
+
q2.dnaMethylation.promoter = { unit: q.dnaMethylation.promoter.unit };
|
|
196
|
+
}
|
|
197
|
+
}
|
|
192
198
|
if (q.ld) {
|
|
193
199
|
q2.ld = structuredClone(q.ld);
|
|
194
200
|
}
|
|
@@ -248,6 +254,9 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
248
254
|
if (q.singleCell.DEgenes) {
|
|
249
255
|
q2.singleCell.DEgenes = { termId: q.singleCell.DEgenes.termId };
|
|
250
256
|
}
|
|
257
|
+
if (q.singleCell?.terms?.length) {
|
|
258
|
+
c.scctTerms = q.singleCell.terms;
|
|
259
|
+
}
|
|
251
260
|
}
|
|
252
261
|
if (q.images) {
|
|
253
262
|
q2.images = {};
|
|
@@ -263,8 +272,14 @@ function getAllowedTermTypes(ds) {
|
|
|
263
272
|
}
|
|
264
273
|
if (ds.queries?.geneExpression) typeSet.add(TermTypes.GENE_EXPRESSION);
|
|
265
274
|
if (ds.queries?.metaboliteIntensity) typeSet.add(TermTypes.METABOLITE_INTENSITY);
|
|
275
|
+
if (ds.queries?.proteome?.whole) typeSet.add(TermTypes.WHOLE_PROTEOME_ABUNDANCE);
|
|
266
276
|
if (ds.queries?.ssGSEA) typeSet.add(TermTypes.SSGSEA);
|
|
267
|
-
if (ds.
|
|
277
|
+
if (ds.queries?.dnaMethylation) typeSet.add(TermTypes.DNA_METHYLATION);
|
|
278
|
+
if (ds.queries?.singleCell) {
|
|
279
|
+
typeSet.add(TermTypes.SINGLECELL_CELLTYPE);
|
|
280
|
+
if (ds.queries.singleCell?.geneExpression) typeSet.add(TermTypes.SINGLECELL_GENE_EXPRESSION);
|
|
281
|
+
}
|
|
282
|
+
if (ds.cohort.termdb.termCollections?.length) typeSet.add("termCollection");
|
|
268
283
|
return [...typeSet];
|
|
269
284
|
}
|
|
270
285
|
function getSelectCohort(ds, req) {
|