@sjcrh/proteinpaint-server 2.189.0 → 2.190.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -12
- package/routes/aiProjectAdmin.js +2 -28
- package/routes/aiProjectSelectedWSImages.js +2 -16
- package/routes/brainImaging.js +1 -15
- package/routes/brainImagingSamples.js +1 -15
- package/routes/burden.js +1 -15
- package/routes/correlationVolcano.js +1 -15
- package/routes/dataset.js +1 -16
- package/routes/deleteWSITileSelection.js +2 -12
- package/routes/dsdata.js +1 -19
- package/routes/gdc.grin2.list.js +1 -15
- package/routes/gdc.grin2.run.js +1 -15
- package/routes/gdc.maf.js +1 -15
- package/routes/gdc.mafBuild.js +1 -15
- package/routes/genesetEnrichment.js +129 -97
- package/routes/grin2.js +110 -79
- package/routes/saveWSIAnnotation.js +2 -13
- package/routes/termdb.DE.js +137 -54
- package/routes/termdb.categories.js +2 -16
- package/routes/termdb.chat.js +169 -1076
- package/routes/termdb.cluster.js +5 -16
- package/routes/termdb.config.js +12 -17
- package/routes/termdb.descrstats.js +2 -16
- package/routes/termdb.diffMeth.js +100 -21
- package/routes/termdb.geneRanking.js +139 -0
- package/routes/termdb.proteome.js +1 -15
- package/routes/termdb.runChart.js +16 -30
- package/routes/termdb.sampleScatter.js +7 -97
- package/routes/termdb.singleCellPlots.js +159 -0
- package/routes/termdb.singlecellSamples.js +6 -16
- package/routes/termdb.violinBox.js +1 -15
- package/routes/wsimages.js +1 -16
- package/src/app.js +4028 -4116
- package/routes/_template_.js +0 -33
- package/routes/aiProjectTrainModel.js +0 -68
- package/routes/alphaGenome.js +0 -41
- package/routes/alphaGenomeTypes.js +0 -36
- package/routes/dzimages.js +0 -55
- package/routes/gene2canonicalisoform.js +0 -37
- package/routes/genelookup.js +0 -32
- package/routes/genesetOverrepresentation.js +0 -49
- package/routes/genomes.js +0 -150
- package/routes/healthcheck.js +0 -35
- package/routes/hicdata.js +0 -74
- package/routes/hicgenome.js +0 -75
- package/routes/hicstat.js +0 -35
- package/routes/img.js +0 -46
- package/routes/isoformlst.js +0 -48
- package/routes/ntseq.js +0 -36
- package/routes/pdomain.js +0 -53
- package/routes/profile.barchart2.js +0 -114
- package/routes/profile.forms2.js +0 -107
- package/routes/profile.polar2.js +0 -101
- package/routes/profile.radar2.js +0 -112
- package/routes/profile.radarFacility2.js +0 -148
- package/routes/sampledzimages.js +0 -48
- package/routes/samplewsimages.js +0 -60
- package/routes/snp.js +0 -98
- package/routes/termdb.chat2.js +0 -217
- package/routes/termdb.chat3.js +0 -209
- package/routes/termdb.cohort.summary.js +0 -37
- package/routes/termdb.cohorts.js +0 -41
- package/routes/termdb.dapVolcano.js +0 -80
- package/routes/termdb.dmr.js +0 -93
- package/routes/termdb.filterTermValues.js +0 -89
- package/routes/termdb.isoformAvailability.js +0 -35
- package/routes/termdb.numericcategories.js +0 -46
- package/routes/termdb.percentile.js +0 -66
- package/routes/termdb.profileFormScores.js +0 -92
- package/routes/termdb.profileScores.js +0 -113
- package/routes/termdb.rootterm.js +0 -39
- package/routes/termdb.sampleImages.js +0 -63
- package/routes/termdb.singleSampleMutation.js +0 -75
- package/routes/termdb.singlecellDEgenes.js +0 -55
- package/routes/termdb.singlecellData.js +0 -39
- package/routes/termdb.termchildren.js +0 -42
- package/routes/termdb.termsbyids.js +0 -50
- package/routes/termdb.topMutatedGenes.js +0 -127
- package/routes/termdb.topTermsByType.js +0 -96
- package/routes/termdb.topVariablyExpressedGenes.js +0 -132
- package/routes/tileserver.js +0 -68
- package/routes/wsisamples.js +0 -71
package/routes/termdb.chat.js
CHANGED
|
@@ -1,132 +1,78 @@
|
|
|
1
|
-
import fs from "fs";
|
|
2
|
-
import { ezFetch } from "#shared";
|
|
3
|
-
import { get_samples } from "#src/termdb.sql.js";
|
|
4
|
-
import { ChatPayload } from "#types/checkers";
|
|
5
|
-
import { extractResourceResponse } from "./chat/resource.ts";
|
|
6
|
-
import serverconfig from "../src/serverconfig.js";
|
|
7
1
|
import { mayLog } from "#src/helpers.ts";
|
|
8
|
-
import Database from "better-sqlite3";
|
|
9
2
|
import { formatElapsedTime } from "#shared";
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
}
|
|
25
|
-
},
|
|
26
|
-
required: ["term", "category"],
|
|
27
|
-
additionalProperties: false
|
|
28
|
-
},
|
|
29
|
-
NumericFilterTerm: {
|
|
30
|
-
type: "object",
|
|
31
|
-
properties: {
|
|
32
|
-
term: { type: "string", description: "Name of numeric term" },
|
|
33
|
-
start: { type: "number", description: "start position (or lower limit) of numeric term" },
|
|
34
|
-
stop: { type: "number", description: "stop position (or upper limit) of numeric term" },
|
|
35
|
-
join: {
|
|
36
|
-
type: "string",
|
|
37
|
-
enum: ["and", "or"],
|
|
38
|
-
description: "join term to be used only when there is more than one filter term and should be placed from the 2nd filter term onwards describing how it connects to the previous term"
|
|
39
|
-
}
|
|
40
|
-
},
|
|
41
|
-
required: ["term"],
|
|
42
|
-
additionalProperties: false
|
|
43
|
-
}
|
|
44
|
-
};
|
|
45
|
-
function formatTrainingExamples(trainingData) {
|
|
46
|
-
return trainingData.map(
|
|
47
|
-
(td, i) => "Example question" + (i + 1).toString() + ": " + td.question + " Example answer" + (i + 1).toString() + ":" + JSON.stringify(td.answer)
|
|
48
|
-
).join(" ");
|
|
49
|
-
}
|
|
50
|
-
const FILTER_DESCRIPTION = 'There are two kinds of filter variables: "Categorical" and "Numeric". "Categorical" variables are those variables which can have a fixed set of values e.g. gender, race. They are defined by the "CategoricalFilterTerm" which consists of "term" (a field from the sqlite3 db) and "category" (a value of the field from the sqlite db). "Numeric" variables are those which can have any numeric value. They are defined by "NumericFilterTerm" and contain the subfields "term" (a field from the sqlite3 db), "start" an optional filter which is defined when a lower cutoff is defined in the user input for the numeric variable and "stop" an optional filter which is defined when a higher cutoff is defined in the user input for the numeric variable. ';
|
|
51
|
-
function extractGenesFromPrompt(prompt, genes_list) {
|
|
52
|
-
const words = prompt.replace(/[^a-zA-Z0-9\s]/g, "").split(/\s+/).map((str) => str.toLowerCase());
|
|
53
|
-
return words.filter((item) => genes_list.includes(item));
|
|
54
|
-
}
|
|
55
|
-
const CHILD_TYPE_DEFAULTS = {
|
|
56
|
-
"categorical:undefined": "barchart",
|
|
57
|
-
"numeric:undefined": "violin",
|
|
58
|
-
"categorical:categorical": "barchart",
|
|
59
|
-
"numeric:categorical": "violin",
|
|
60
|
-
"categorical:numeric": "violin",
|
|
61
|
-
"numeric:numeric": "sampleScatter"
|
|
62
|
-
};
|
|
63
|
-
const CHILD_TYPE_INVALID = {
|
|
64
|
-
"categorical:undefined": /* @__PURE__ */ new Set(["violin", "boxplot", "sampleScatter"]),
|
|
65
|
-
"categorical:categorical": /* @__PURE__ */ new Set(["violin", "boxplot", "sampleScatter"])
|
|
66
|
-
};
|
|
67
|
-
function resolveChildType(cat1, cat2, llmChildType) {
|
|
68
|
-
const norm1 = cat1 == "float" || cat1 == "integer" ? "numeric" : cat1 || "undefined";
|
|
69
|
-
const norm2 = cat2 == "float" || cat2 == "integer" ? "numeric" : cat2 || "undefined";
|
|
70
|
-
const key = norm1 + ":" + norm2;
|
|
71
|
-
const defaultType = CHILD_TYPE_DEFAULTS[key];
|
|
72
|
-
if (!defaultType) {
|
|
73
|
-
return { childType: "barchart" };
|
|
74
|
-
}
|
|
75
|
-
const invalid = CHILD_TYPE_INVALID[key];
|
|
76
|
-
if (llmChildType && invalid && invalid.has(llmChildType)) {
|
|
77
|
-
return {
|
|
78
|
-
error: "Invalid plot type supplied by the user: " + llmChildType + ". For " + key.replace(":", " and ") + " variables the plot type should always be " + defaultType
|
|
79
|
-
};
|
|
80
|
-
}
|
|
81
|
-
return {
|
|
82
|
-
childType: llmChildType || defaultType,
|
|
83
|
-
bothNumeric: norm1 == "numeric" && norm2 == "numeric"
|
|
84
|
-
};
|
|
85
|
-
}
|
|
86
|
-
const api = {
|
|
87
|
-
endpoint: "termdb/chat",
|
|
88
|
-
methods: {
|
|
89
|
-
get: {
|
|
90
|
-
...ChatPayload,
|
|
91
|
-
init
|
|
92
|
-
},
|
|
93
|
-
post: {
|
|
94
|
-
...ChatPayload,
|
|
95
|
-
init
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
};
|
|
3
|
+
import { readJSONFile, parse_geneset_db, getChatRelatedPlotTypes } from "./chat/utils.ts";
|
|
4
|
+
import { classifyQuery } from "./chat/classify1.ts";
|
|
5
|
+
import { classifyPlotType } from "./chat/plot.ts";
|
|
6
|
+
import { classifyNotPlot } from "./chat/classify2.ts";
|
|
7
|
+
import { inferScaffold } from "./chat/scaffold.ts";
|
|
8
|
+
import serverconfig from "../src/serverconfig.js";
|
|
9
|
+
import { getDsAllowedTermTypes } from "./termdb.config.ts";
|
|
10
|
+
import { phrase2entity } from "./chat/phrase2entity.ts";
|
|
11
|
+
import { inferTermObjFromEntity } from "./chat/entity2termObj.ts";
|
|
12
|
+
import { resolveToTwTvs } from "./chat/entity2twTvs.ts";
|
|
13
|
+
import { answerDataQueries } from "./chat/dataQueries.ts";
|
|
14
|
+
import path from "path";
|
|
15
|
+
import fs from "fs";
|
|
16
|
+
import { resolveToPlotState } from "./chat/scaffold2state.ts";
|
|
99
17
|
function init({ genomes }) {
|
|
100
18
|
return async (req, res) => {
|
|
101
19
|
const q = req.query;
|
|
102
20
|
try {
|
|
103
|
-
const
|
|
104
|
-
if (!
|
|
105
|
-
const ds =
|
|
21
|
+
const genome = genomes[q.genome];
|
|
22
|
+
if (!genome) throw "invalid genome";
|
|
23
|
+
const ds = genome.datasets?.[q.dslabel];
|
|
106
24
|
if (!ds) throw "invalid dslabel";
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
25
|
+
if (!ds.queries.chat) {
|
|
26
|
+
return res.send({
|
|
27
|
+
type: "text",
|
|
28
|
+
text: "Only search functionality supported for this data. No chat functionality supported."
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
const aiFilesDir = serverconfig.binpath + "/../../dataset/ai/" + q.dslabel;
|
|
32
|
+
let agentFiles = [];
|
|
33
|
+
try {
|
|
34
|
+
agentFiles = await fs.readdirSync(aiFilesDir).filter((file) => file.endsWith(".json"));
|
|
35
|
+
} catch (err) {
|
|
36
|
+
if (err.code === "ENOENT") throw new Error(`Directory not found: ${aiFilesDir}`);
|
|
37
|
+
if (err.code === "ENOTDIR") throw new Error(`Path is not a directory: ${aiFilesDir}`);
|
|
38
|
+
throw err;
|
|
110
39
|
}
|
|
111
40
|
const llm = serverconfig.llm;
|
|
112
41
|
if (!llm) throw "serverconfig.llm is not configured";
|
|
113
|
-
if (llm.provider !== "SJ" && llm.provider !== "ollama") {
|
|
114
|
-
throw "llm.provider must be 'SJ' or '
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
42
|
+
if (llm.provider !== "SJ" && llm.provider !== "ollama" && llm.provider !== "huggingface" && llm.provider !== "azure") {
|
|
43
|
+
throw "llm.provider must be 'SJ', 'ollama', 'huggingface', or 'azure'";
|
|
44
|
+
}
|
|
45
|
+
let rawFilter;
|
|
46
|
+
if (typeof q.filter === "string") {
|
|
47
|
+
try {
|
|
48
|
+
rawFilter = JSON.parse(q.filter);
|
|
49
|
+
} catch (e) {
|
|
50
|
+
throw new Error("Failed to parse filter JSON string: " + e);
|
|
51
|
+
}
|
|
52
|
+
} else {
|
|
53
|
+
rawFilter = q.filter;
|
|
54
|
+
}
|
|
55
|
+
const filter = rawFilter && typeof rawFilter === "object" ? rawFilter : {};
|
|
56
|
+
const lst = Array.isArray(filter.lst) ? filter.lst : [];
|
|
57
|
+
const cohortFilter = lst.find((item) => item.tag === "cohortFilter");
|
|
58
|
+
const cohortKey = cohortFilter ? cohortFilter.tvs.values[0].key : "";
|
|
59
|
+
const supportedPlotTypes = ds.cohort.termdb.q?.getSupportedChartTypes(req)?.[cohortKey];
|
|
60
|
+
const chatSupportedPlotTypes = getChatRelatedPlotTypes(supportedPlotTypes);
|
|
61
|
+
const genedb = serverconfig.tpmasterdir + "/" + genome.genedb.dbfile;
|
|
62
|
+
const allowedTermTypes = getDsAllowedTermTypes(ds);
|
|
120
63
|
const ai_output_json = await run_chat_pipeline(
|
|
121
64
|
q.prompt,
|
|
122
65
|
llm,
|
|
123
|
-
|
|
124
|
-
dataset_json,
|
|
125
|
-
testing,
|
|
126
|
-
dataset_db,
|
|
66
|
+
ds,
|
|
127
67
|
genedb,
|
|
128
|
-
|
|
68
|
+
agentFiles,
|
|
69
|
+
aiFilesDir,
|
|
70
|
+
chatSupportedPlotTypes,
|
|
71
|
+
allowedTermTypes,
|
|
72
|
+
genome
|
|
73
|
+
// testing
|
|
129
74
|
);
|
|
75
|
+
mayLog("From init: Final AI output JSON:", JSON.stringify(ai_output_json));
|
|
130
76
|
res.send(ai_output_json);
|
|
131
77
|
} catch (e) {
|
|
132
78
|
if (e.stack) mayLog(e.stack);
|
|
@@ -134,977 +80,124 @@ function init({ genomes }) {
|
|
|
134
80
|
}
|
|
135
81
|
};
|
|
136
82
|
}
|
|
137
|
-
async function run_chat_pipeline(
|
|
83
|
+
async function run_chat_pipeline(userPrompt, llm, ds, genedb, agentFiles, aiFilesDir, supportedPlotTypes, allowedTermTypes, genome) {
|
|
84
|
+
if (!fs.existsSync(path.join(aiFilesDir, "main.json")))
|
|
85
|
+
throw "Main data file is not specified for dataset:" + ds.label;
|
|
86
|
+
const dataset_json = await readJSONFile(path.join(aiFilesDir, "main.json"));
|
|
138
87
|
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
139
|
-
const class_response = await
|
|
140
|
-
user_prompt,
|
|
141
|
-
llm,
|
|
142
|
-
aiRoute,
|
|
143
|
-
dataset_json
|
|
144
|
-
);
|
|
145
|
-
let ai_output_json;
|
|
88
|
+
const class_response = await classifyQuery(userPrompt, llm);
|
|
146
89
|
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
mayLog("classResult:", classResult);
|
|
159
|
-
const dataset_db_output = await parse_dataset_db(dataset_db);
|
|
160
|
-
const genes_list = dataset_json.hasGeneExpression ? await parse_geneset_db(genedb) : [];
|
|
161
|
-
if (classResult == "summary") {
|
|
162
|
-
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
163
|
-
ai_output_json = await extract_summary_terms(
|
|
164
|
-
user_prompt,
|
|
165
|
-
llm,
|
|
166
|
-
dataset_db_output,
|
|
167
|
-
dataset_json,
|
|
168
|
-
genes_list,
|
|
169
|
-
ds,
|
|
170
|
-
testing
|
|
171
|
-
);
|
|
172
|
-
mayLog("Time taken for summary agent:", formatElapsedTime(Date.now() - time12));
|
|
173
|
-
} else if (classResult == "dge") {
|
|
174
|
-
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
175
|
-
ai_output_json = await extract_DE_search_terms_from_query(
|
|
176
|
-
user_prompt,
|
|
177
|
-
llm,
|
|
178
|
-
dataset_db_output,
|
|
179
|
-
dataset_json,
|
|
180
|
-
ds,
|
|
181
|
-
testing
|
|
182
|
-
);
|
|
183
|
-
mayLog("Time taken for DE agent:", formatElapsedTime(Date.now() - time12));
|
|
184
|
-
} else if (classResult == "survival") {
|
|
185
|
-
ai_output_json = { type: "text", text: "survival agent has not been implemented yet" };
|
|
186
|
-
} else if (classResult == "matrix") {
|
|
187
|
-
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
188
|
-
ai_output_json = await extract_matrix_search_terms_from_query(
|
|
189
|
-
user_prompt,
|
|
190
|
-
llm,
|
|
191
|
-
dataset_db_output,
|
|
192
|
-
dataset_json,
|
|
193
|
-
genes_list,
|
|
194
|
-
ds,
|
|
195
|
-
testing
|
|
196
|
-
);
|
|
197
|
-
mayLog("Time taken for matrix agent:", formatElapsedTime(Date.now() - time12));
|
|
198
|
-
} else if (classResult == "sampleScatter") {
|
|
199
|
-
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
200
|
-
ai_output_json = await extract_samplescatter_terms_from_query(
|
|
201
|
-
user_prompt,
|
|
202
|
-
llm,
|
|
203
|
-
dataset_db_output,
|
|
204
|
-
dataset_json,
|
|
205
|
-
genes_list,
|
|
206
|
-
ds,
|
|
207
|
-
testing
|
|
208
|
-
);
|
|
209
|
-
mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
|
|
210
|
-
} else {
|
|
211
|
-
ai_output_json = { type: "text", text: "Unknown classification value" };
|
|
212
|
-
}
|
|
213
|
-
} else {
|
|
214
|
-
ai_output_json = { type: "text", text: "Unknown classification type" };
|
|
215
|
-
}
|
|
216
|
-
return ai_output_json;
|
|
217
|
-
}
|
|
218
|
-
async function call_ollama(prompt, model_name, apilink) {
|
|
219
|
-
const temperature = 0.01;
|
|
220
|
-
const top_p = 0.95;
|
|
221
|
-
const timeout = 2e5;
|
|
222
|
-
const payload = {
|
|
223
|
-
model: model_name,
|
|
224
|
-
messages: [{ role: "user", content: prompt }],
|
|
225
|
-
raw: false,
|
|
226
|
-
stream: false,
|
|
227
|
-
keep_alive: 15,
|
|
228
|
-
//Keep the LLM loaded for 15mins
|
|
229
|
-
options: {
|
|
230
|
-
top_p,
|
|
231
|
-
temperature,
|
|
232
|
-
num_ctx: 1e4
|
|
233
|
-
}
|
|
234
|
-
};
|
|
235
|
-
try {
|
|
236
|
-
const result = await ezFetch(apilink + "/api/chat", {
|
|
237
|
-
method: "POST",
|
|
238
|
-
body: payload,
|
|
239
|
-
// ezfetch automatically stringifies objects
|
|
240
|
-
headers: { "Content-Type": "application/json" },
|
|
241
|
-
timeout: { request: timeout }
|
|
242
|
-
// ezfetch accepts milliseconds directly
|
|
243
|
-
});
|
|
244
|
-
if (result && result.message && result.message.content && result.message.content.length > 0)
|
|
245
|
-
return result.message.content;
|
|
246
|
-
else {
|
|
247
|
-
throw "Error: Received an unexpected response format:" + result;
|
|
248
|
-
}
|
|
249
|
-
} catch (error) {
|
|
250
|
-
throw "Ollama API request failed:" + error;
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
async function call_sj_llm(prompt, model_name, apilink) {
|
|
254
|
-
const temperature = 0.01;
|
|
255
|
-
const top_p = 0.95;
|
|
256
|
-
const timeout = 2e5;
|
|
257
|
-
const max_new_tokens = 512;
|
|
258
|
-
const payload = {
|
|
259
|
-
inputs: [
|
|
260
|
-
{
|
|
261
|
-
model_name,
|
|
262
|
-
inputs: {
|
|
263
|
-
text: prompt,
|
|
264
|
-
max_new_tokens,
|
|
265
|
-
temperature,
|
|
266
|
-
top_p
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
]
|
|
270
|
-
};
|
|
271
|
-
try {
|
|
272
|
-
const response = await ezFetch(apilink, {
|
|
273
|
-
method: "POST",
|
|
274
|
-
body: payload,
|
|
275
|
-
// ezfetch automatically stringifies objects
|
|
276
|
-
headers: { "Content-Type": "application/json" },
|
|
277
|
-
timeout: { request: timeout }
|
|
278
|
-
// ezfetch accepts milliseconds directly
|
|
279
|
-
});
|
|
280
|
-
if (response.outputs && response.outputs[0] && response.outputs[0].generated_text) {
|
|
281
|
-
const result = response.outputs[0].generated_text;
|
|
282
|
-
return result;
|
|
283
|
-
} else {
|
|
284
|
-
throw "Error: Received an unexpected response format:" + response;
|
|
285
|
-
}
|
|
286
|
-
} catch (error) {
|
|
287
|
-
throw "SJ API request failed:" + error;
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
async function route_to_appropriate_llm_provider(template, llm) {
|
|
291
|
-
let response;
|
|
292
|
-
if (llm.provider == "SJ") {
|
|
293
|
-
response = await call_sj_llm(template, llm.modelName, llm.api);
|
|
294
|
-
} else if (llm.provider == "ollama") {
|
|
295
|
-
response = await call_ollama(template, llm.modelName, llm.api);
|
|
296
|
-
} else {
|
|
297
|
-
throw "Unknown LLM provider";
|
|
298
|
-
}
|
|
299
|
-
return response;
|
|
300
|
-
}
|
|
301
|
-
function checkField(sentence) {
|
|
302
|
-
if (!sentence) return "";
|
|
303
|
-
else return sentence;
|
|
304
|
-
}
|
|
305
|
-
async function readJSONFile(file) {
|
|
306
|
-
const json_file = await fs.promises.readFile(file);
|
|
307
|
-
return JSON.parse(json_file.toString());
|
|
308
|
-
}
|
|
309
|
-
async function classify_query_by_dataset_type(user_prompt, llm, aiRoute, dataset_json) {
|
|
310
|
-
const data = await readJSONFile(aiRoute);
|
|
311
|
-
let contents = data["general"];
|
|
312
|
-
for (const key of Object.keys(data)) {
|
|
313
|
-
if (key != "general") {
|
|
314
|
-
contents += data[key];
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
const classification_ds = dataset_json.charts.find((chart) => chart.type == "Classification");
|
|
318
|
-
if (!classification_ds) throw "Classification information is not present in the dataset file.";
|
|
319
|
-
if (classification_ds.TrainingData.length == 0) throw "No training data is provided for the classification agent.";
|
|
320
|
-
let training_data = "";
|
|
321
|
-
if (classification_ds && classification_ds.TrainingData.length > 0) {
|
|
322
|
-
contents += checkField(dataset_json.DatasetPrompt) + checkField(classification_ds.SystemPrompt);
|
|
323
|
-
training_data = formatTrainingExamples(classification_ds.TrainingData);
|
|
324
|
-
}
|
|
325
|
-
const template = contents + " training data is as follows:" + training_data + " Question: {" + user_prompt + "} Answer: {answer}";
|
|
326
|
-
const response = await route_to_appropriate_llm_provider(template, llm);
|
|
327
|
-
return JSON.parse(response);
|
|
328
|
-
}
|
|
329
|
-
async function extract_DE_search_terms_from_query(prompt, llm, dataset_db_output, dataset_json, ds, testing) {
|
|
330
|
-
if (dataset_json.hasDE) {
|
|
331
|
-
const Schema = {
|
|
332
|
-
$schema: "http://json-schema.org/draft-07/schema#",
|
|
333
|
-
$ref: "#/definitions/DEType",
|
|
334
|
-
definitions: {
|
|
335
|
-
DEType: {
|
|
336
|
-
type: "object",
|
|
337
|
-
properties: {
|
|
338
|
-
group1: {
|
|
339
|
-
type: "array",
|
|
340
|
-
items: { $ref: "#/definitions/FilterTerm" },
|
|
341
|
-
description: "Name of group1 which is an array of filter terms"
|
|
342
|
-
},
|
|
343
|
-
group2: {
|
|
344
|
-
type: "array",
|
|
345
|
-
items: { $ref: "#/definitions/FilterTerm" },
|
|
346
|
-
description: "Name of group2 which is an array of filter terms"
|
|
347
|
-
},
|
|
348
|
-
method: {
|
|
349
|
-
type: "string",
|
|
350
|
-
enum: ["edgeR", "limma", "wilcoxon"],
|
|
351
|
-
description: "Method used for carrying out differential gene expression analysis"
|
|
352
|
-
}
|
|
353
|
-
},
|
|
354
|
-
required: ["group1", "group2"],
|
|
355
|
-
additionalProperties: false
|
|
356
|
-
},
|
|
357
|
-
...FILTER_TERM_DEFINITIONS
|
|
358
|
-
}
|
|
359
|
-
};
|
|
360
|
-
const DE_ds = dataset_json.charts.find((chart) => chart.type == "DE");
|
|
361
|
-
if (!DE_ds) throw "DE information is not present in the dataset file.";
|
|
362
|
-
if (DE_ds.TrainingData.length == 0) throw "No training data is provided for the DE agent.";
|
|
363
|
-
const training_data = formatTrainingExamples(DE_ds.TrainingData);
|
|
364
|
-
const system_prompt = "I am an assistant that extracts the groups from the user prompt to carry out differential gene expression. The final output must be in the following JSON with NO extra comments. The schema is as follows: " + JSON.stringify(Schema) + ' . "group1" and "group2" fields are compulsory. Both "group1" and "group2" consist of an array of filter variables. ' + FILTER_DESCRIPTION + checkField(dataset_json.DatasetPrompt) + checkField(DE_ds.SystemPrompt) + "The sqlite db in plain language is as follows:\n" + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data + " Question: {" + prompt + "} answer:";
|
|
365
|
-
const response = await route_to_appropriate_llm_provider(system_prompt, llm);
|
|
366
|
-
if (testing) {
|
|
367
|
-
return { action: "dge", response: JSON.parse(response) };
|
|
368
|
-
} else {
|
|
369
|
-
return await validate_DE_response(response, ds, dataset_db_output.db_rows);
|
|
370
|
-
}
|
|
371
|
-
} else {
|
|
372
|
-
return { type: "html", html: "Differential gene expression not supported for this dataset" };
|
|
373
|
-
}
|
|
374
|
-
}
|
|
375
|
-
async function validate_DE_response(response, ds, db_rows) {
|
|
376
|
-
const response_type = JSON.parse(response);
|
|
377
|
-
let html = "";
|
|
378
|
-
let group1;
|
|
379
|
-
let samples1lst;
|
|
380
|
-
const name1 = generate_group_name(response_type.group1, db_rows);
|
|
381
|
-
if (!response_type.group1) {
|
|
382
|
-
html += "group1 not present in DE output";
|
|
383
|
-
} else {
|
|
384
|
-
const validated_filters = validate_filter(response_type.group1, ds, name1);
|
|
385
|
-
if (validated_filters.html.length > 0) {
|
|
386
|
-
html += validated_filters.html;
|
|
387
|
-
} else {
|
|
388
|
-
const samples1 = await get_samples({ filter: validated_filters.simplefilter }, ds, true);
|
|
389
|
-
samples1lst = samples1.map((item) => ({
|
|
390
|
-
sampleId: item.id,
|
|
391
|
-
sample: item.name
|
|
392
|
-
}));
|
|
393
|
-
group1 = {
|
|
394
|
-
name: name1,
|
|
395
|
-
in: true,
|
|
396
|
-
values: samples1lst
|
|
90
|
+
let ai_output_json;
|
|
91
|
+
if (class_response.type === "notplot") {
|
|
92
|
+
const time2 = (/* @__PURE__ */ new Date()).valueOf();
|
|
93
|
+
const notPlotResult = await classifyNotPlot(userPrompt, llm, agentFiles, aiFilesDir);
|
|
94
|
+
mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
|
|
95
|
+
if (notPlotResult.type === "html") {
|
|
96
|
+
ai_output_json = notPlotResult;
|
|
97
|
+
} else {
|
|
98
|
+
ai_output_json = {
|
|
99
|
+
type: "text",
|
|
100
|
+
text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
|
|
397
101
|
};
|
|
398
102
|
}
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
const
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
sample: item.name
|
|
414
|
-
}));
|
|
415
|
-
group2 = {
|
|
416
|
-
name: name2,
|
|
417
|
-
in: true,
|
|
418
|
-
values: samples2lst
|
|
103
|
+
} else if (class_response.type === "binaryQuery") {
|
|
104
|
+
const answer = await answerDataQueries(userPrompt, llm, allowedTermTypes);
|
|
105
|
+
if (!answer) throw "Couldn't decide if this is data related query!";
|
|
106
|
+
mayLog("Data Binary Query: ", answer);
|
|
107
|
+
ai_output_json = answer;
|
|
108
|
+
} else if (class_response.type === "plot") {
|
|
109
|
+
let time = (/* @__PURE__ */ new Date()).valueOf();
|
|
110
|
+
const plotType = await classifyPlotType(userPrompt, llm);
|
|
111
|
+
mayLog("Time taken to classify plot type:", formatElapsedTime(Date.now() - time));
|
|
112
|
+
if (!supportedPlotTypes.includes(plotType)) {
|
|
113
|
+
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
114
|
+
ai_output_json = {
|
|
115
|
+
type: "text",
|
|
116
|
+
text: log
|
|
419
117
|
};
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
118
|
+
mayLog(log);
|
|
119
|
+
return ai_output_json;
|
|
120
|
+
}
|
|
121
|
+
const genes_list = await parse_geneset_db(genedb);
|
|
122
|
+
mayLog("#################################################");
|
|
123
|
+
mayLog("####### First phase: Infer Plot Scaffolds #######");
|
|
124
|
+
mayLog("#################################################");
|
|
125
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
126
|
+
const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
|
|
127
|
+
const scaffoldResult = await inferScaffold(
|
|
128
|
+
userPrompt,
|
|
129
|
+
plotType,
|
|
130
|
+
llm,
|
|
131
|
+
genome,
|
|
132
|
+
genes_list,
|
|
133
|
+
allowedTermTypes,
|
|
134
|
+
dataset_json,
|
|
135
|
+
ds,
|
|
136
|
+
dataset_db
|
|
435
137
|
);
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
}
|
|
460
|
-
}
|
|
461
|
-
}
|
|
462
|
-
};
|
|
463
|
-
pp_plot_json.state = {
|
|
464
|
-
customTerms: [
|
|
465
|
-
{
|
|
466
|
-
name: name1 + " vs " + name2,
|
|
467
|
-
tw
|
|
468
|
-
}
|
|
469
|
-
],
|
|
470
|
-
groups
|
|
471
|
-
};
|
|
472
|
-
pp_plot_json.samplelst = { groups };
|
|
473
|
-
pp_plot_json.tw = tw;
|
|
474
|
-
pp_plot_json.settings = settings;
|
|
475
|
-
return { type: "plot", plot: pp_plot_json };
|
|
476
|
-
}
|
|
477
|
-
}
|
|
478
|
-
function generate_group_name(filters, db_rows) {
|
|
479
|
-
let name = "";
|
|
480
|
-
let iter = 0;
|
|
481
|
-
for (const filter of filters) {
|
|
482
|
-
if (iter > 0 && !filter.join) {
|
|
483
|
-
name += "&";
|
|
484
|
-
}
|
|
485
|
-
if (filter.join && filter.join == "and") {
|
|
486
|
-
name += "&";
|
|
487
|
-
}
|
|
488
|
-
if (filter.join && filter.join == "or") {
|
|
489
|
-
name += "|";
|
|
490
|
-
}
|
|
491
|
-
if (filter.category) {
|
|
492
|
-
name += find_label(filter, db_rows);
|
|
493
|
-
}
|
|
494
|
-
if (filter.start) {
|
|
495
|
-
name += filter.term + ">=" + filter.start.toString();
|
|
496
|
-
}
|
|
497
|
-
if (filter.stop) {
|
|
498
|
-
name += filter.term + "<=" + filter.stop.toString();
|
|
499
|
-
}
|
|
500
|
-
iter += 1;
|
|
501
|
-
}
|
|
502
|
-
return name;
|
|
503
|
-
}
|
|
504
|
-
function find_label(filter, db_rows) {
|
|
505
|
-
let label = "";
|
|
506
|
-
for (const row of db_rows) {
|
|
507
|
-
if (row.name == filter.term) {
|
|
508
|
-
for (const value of row.values) {
|
|
509
|
-
if (value.value && value.value.label && filter.category == value.key) {
|
|
510
|
-
label = value.value.label;
|
|
511
|
-
break;
|
|
512
|
-
}
|
|
513
|
-
}
|
|
514
|
-
break;
|
|
515
|
-
}
|
|
516
|
-
}
|
|
517
|
-
return label;
|
|
518
|
-
}
|
|
519
|
-
async function extract_summary_terms(prompt, llm, dataset_db_output, dataset_json, genes_list, ds, testing) {
|
|
520
|
-
const Schema = {
|
|
521
|
-
$schema: "http://json-schema.org/draft-07/schema#",
|
|
522
|
-
$ref: "#/definitions/SummaryType",
|
|
523
|
-
definitions: {
|
|
524
|
-
SummaryType: {
|
|
525
|
-
type: "object",
|
|
526
|
-
properties: {
|
|
527
|
-
term: { type: "string", description: "Name of 1st term" },
|
|
528
|
-
term2: { type: "string", description: "Name of 2nd term" },
|
|
529
|
-
simpleFilter: {
|
|
530
|
-
type: "array",
|
|
531
|
-
items: { $ref: "#/definitions/FilterTerm" },
|
|
532
|
-
description: "Optional simple filter terms"
|
|
533
|
-
},
|
|
534
|
-
childType: {
|
|
535
|
-
type: "string",
|
|
536
|
-
enum: ["violin", "boxplot", "sampleScatter", "barchart"],
|
|
537
|
-
description: "Optional explicit child type requested by the user. If omitted, the logic of the data types picks the child type."
|
|
538
|
-
}
|
|
539
|
-
},
|
|
540
|
-
required: ["term", "simpleFilter"],
|
|
541
|
-
additionalProperties: false
|
|
542
|
-
},
|
|
543
|
-
...FILTER_TERM_DEFINITIONS
|
|
544
|
-
}
|
|
545
|
-
};
|
|
546
|
-
const common_genes = extractGenesFromPrompt(prompt, genes_list);
|
|
547
|
-
const summary_ds = dataset_json.charts.find((chart) => chart.type == "Summary");
|
|
548
|
-
if (!summary_ds) throw "Summary information is not present in the dataset file.";
|
|
549
|
-
if (summary_ds.TrainingData.length == 0) throw "No training data is provided for the summary agent.";
|
|
550
|
-
const training_data = formatTrainingExamples(summary_ds.TrainingData);
|
|
551
|
-
let system_prompt = "I am an assistant that extracts the summary terms from user query. The final output must be in the following JSON format with NO extra comments. The JSON schema is as follows: " + JSON.stringify(Schema) + ' term and term2 (if present) should ONLY contain names of the fields from the sqlite db. The "simpleFilter" field is optional and should contain an array of JSON terms with which the dataset will be filtered. ' + FILTER_DESCRIPTION + checkField(dataset_json.DatasetPrompt) + checkField(summary_ds.SystemPrompt) + "\n The DB content is as follows: " + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data;
|
|
552
|
-
if (dataset_json.hasGeneExpression) {
|
|
553
|
-
if (common_genes.length > 0) {
|
|
554
|
-
system_prompt += "\n List of relevant genes are as follows (separated by comma(,)):" + common_genes.join(",");
|
|
555
|
-
}
|
|
556
|
-
}
|
|
557
|
-
system_prompt += " Question: {" + prompt + "} answer:";
|
|
558
|
-
const response = await route_to_appropriate_llm_provider(system_prompt, llm);
|
|
559
|
-
if (testing) {
|
|
560
|
-
return { action: "summary", response: JSON.parse(response) };
|
|
561
|
-
} else {
|
|
562
|
-
return validate_summary_response(response, common_genes, dataset_json, ds);
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
|
-
function validate_summary_response(response, common_genes, dataset_json, ds) {
|
|
566
|
-
const response_type = JSON.parse(response);
|
|
567
|
-
const pp_plot_json = { chartType: "summary" };
|
|
568
|
-
let html = "";
|
|
569
|
-
if (response_type.html) html = response_type.html;
|
|
570
|
-
if (!response_type.term) {
|
|
571
|
-
html += "term type is not present in summary output";
|
|
572
|
-
return { type: "html", html };
|
|
573
|
-
}
|
|
574
|
-
const term1_validation = validate_term(response_type.term, common_genes, dataset_json, ds);
|
|
575
|
-
if (term1_validation.html.length > 0) {
|
|
576
|
-
html += term1_validation.html;
|
|
577
|
-
return { type: "html", html };
|
|
578
|
-
} else {
|
|
579
|
-
pp_plot_json.term = term1_validation.term_type;
|
|
580
|
-
if (term1_validation.category == "float" || term1_validation.category == "integer") {
|
|
581
|
-
pp_plot_json.term.q = { mode: "continuous" };
|
|
582
|
-
}
|
|
583
|
-
pp_plot_json.category = term1_validation.category;
|
|
584
|
-
}
|
|
585
|
-
if (response_type.term2) {
|
|
586
|
-
const term2_validation = validate_term(response_type.term2, common_genes, dataset_json, ds);
|
|
587
|
-
if (term2_validation.html.length > 0) {
|
|
588
|
-
html += term2_validation.html;
|
|
589
|
-
return { type: "html", html };
|
|
590
|
-
} else {
|
|
591
|
-
pp_plot_json.term2 = term2_validation.term_type;
|
|
592
|
-
if (term2_validation.category == "float" || term2_validation.category == "integer") {
|
|
593
|
-
pp_plot_json.term2.q = { mode: "continuous" };
|
|
594
|
-
}
|
|
595
|
-
pp_plot_json.category2 = term2_validation.category;
|
|
596
|
-
}
|
|
597
|
-
}
|
|
598
|
-
const llmChildType = response_type.childType && ["violin", "boxplot", "sampleScatter", "barchart"].includes(response_type.childType) ? response_type.childType : void 0;
|
|
599
|
-
const resolved = resolveChildType(pp_plot_json.category, pp_plot_json.category2, llmChildType);
|
|
600
|
-
if (resolved.error) {
|
|
601
|
-
html += resolved.error;
|
|
602
|
-
return { type: "html", html };
|
|
603
|
-
} else {
|
|
604
|
-
pp_plot_json.childType = resolved.childType;
|
|
605
|
-
if (pp_plot_json.childType == "barchart") {
|
|
606
|
-
pp_plot_json.term.q = { mode: "discrete" };
|
|
607
|
-
if (pp_plot_json.term2) {
|
|
608
|
-
pp_plot_json.term2.q = { mode: "discrete" };
|
|
609
|
-
}
|
|
610
|
-
}
|
|
611
|
-
if (resolved.bothNumeric && (resolved.childType == "violin" || resolved.childType == "boxplot")) {
|
|
612
|
-
pp_plot_json.term2.q = { mode: "discrete" };
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
delete pp_plot_json.category;
|
|
616
|
-
if (pp_plot_json.category2) delete pp_plot_json.category2;
|
|
617
|
-
if (response_type.simpleFilter && response_type.simpleFilter.length > 0) {
|
|
618
|
-
const validated_filters = validate_filter(response_type.simpleFilter, ds, "");
|
|
619
|
-
if (validated_filters.html.length > 0) {
|
|
620
|
-
html += validated_filters.html;
|
|
621
|
-
return { type: "html", html };
|
|
622
|
-
} else {
|
|
623
|
-
pp_plot_json.filter = validated_filters.simplefilter;
|
|
624
|
-
}
|
|
625
|
-
}
|
|
626
|
-
return { type: "plot", plot: pp_plot_json };
|
|
627
|
-
}
|
|
628
|
-
async function extract_matrix_search_terms_from_query(prompt, llm, dataset_db_output, dataset_json, genes_list, ds, testing) {
|
|
629
|
-
const Schema = {
|
|
630
|
-
$schema: "http://json-schema.org/draft-07/schema#",
|
|
631
|
-
$ref: "#/definitions/MatrixType",
|
|
632
|
-
definitions: {
|
|
633
|
-
MatrixType: {
|
|
634
|
-
type: "object",
|
|
635
|
-
properties: {
|
|
636
|
-
terms: {
|
|
637
|
-
type: "array",
|
|
638
|
-
items: { type: "string" },
|
|
639
|
-
description: "Names of dictionary/clinical terms to include as rows in the matrix"
|
|
640
|
-
},
|
|
641
|
-
geneNames: {
|
|
642
|
-
type: "array",
|
|
643
|
-
items: { type: "string" },
|
|
644
|
-
description: "Names of genes to include as gene variant rows in the matrix"
|
|
645
|
-
},
|
|
646
|
-
simpleFilter: {
|
|
647
|
-
type: "array",
|
|
648
|
-
items: { $ref: "#/definitions/FilterTerm" },
|
|
649
|
-
description: "Optional simple filter terms to restrict the sample set"
|
|
650
|
-
}
|
|
651
|
-
},
|
|
652
|
-
additionalProperties: false
|
|
653
|
-
},
|
|
654
|
-
...FILTER_TERM_DEFINITIONS
|
|
655
|
-
}
|
|
656
|
-
};
|
|
657
|
-
const common_genes = extractGenesFromPrompt(prompt, genes_list);
|
|
658
|
-
const matrix_ds = dataset_json.charts.filter((chart) => chart.type == "Matrix");
|
|
659
|
-
if (matrix_ds.length == 0) throw "Matrix information is not present in the dataset file.";
|
|
660
|
-
if (matrix_ds[0].TrainingData.length == 0) throw "No training data is provided for the matrix agent.";
|
|
661
|
-
const training_data = formatTrainingExamples(matrix_ds[0].TrainingData);
|
|
662
|
-
let system_prompt = "I am an assistant that extracts terms and gene names from the user query to create a matrix plot. A matrix plot displays multiple genes and/or clinical variables across samples in a grid layout. The final output must be in the following JSON format with NO extra comments. The JSON schema is as follows: " + JSON.stringify(Schema) + ' The "terms" field should ONLY contain names of clinical/dictionary fields from the sqlite db. The "geneNames" field should ONLY contain gene names. At least one of "terms" or "geneNames" must be provided. The "simpleFilter" field is optional and should contain an array of JSON terms with which the dataset will be filtered. ' + FILTER_DESCRIPTION + checkField(dataset_json.DatasetPrompt) + checkField(matrix_ds[0].SystemPrompt) + "\n The DB content is as follows: " + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data;
|
|
663
|
-
if (dataset_json.hasGeneExpression && common_genes.length > 0) {
|
|
664
|
-
system_prompt += "\n List of relevant genes are as follows (separated by comma(,)):" + common_genes.join(",");
|
|
665
|
-
}
|
|
666
|
-
system_prompt += " Question: {" + prompt + "} answer:";
|
|
667
|
-
const response = await route_to_appropriate_llm_provider(system_prompt, llm);
|
|
668
|
-
if (testing) {
|
|
669
|
-
return { action: "matrix", response: JSON.parse(response) };
|
|
670
|
-
} else {
|
|
671
|
-
return validate_matrix_response(response, common_genes, dataset_json, ds);
|
|
672
|
-
}
|
|
673
|
-
}
|
|
674
|
-
function validate_matrix_response(response, common_genes, dataset_json, ds) {
|
|
675
|
-
const response_type = JSON.parse(response);
|
|
676
|
-
const pp_plot_json = { chartType: "matrix" };
|
|
677
|
-
let html = "";
|
|
678
|
-
if (response_type.html) html = response_type.html;
|
|
679
|
-
if ((!response_type.terms || response_type.terms.length == 0) && (!response_type.geneNames || response_type.geneNames.length == 0)) {
|
|
680
|
-
html += "At least one clinical term or gene name is required for a matrix plot";
|
|
681
|
-
}
|
|
682
|
-
const twLst = [];
|
|
683
|
-
if (response_type.terms && Array.isArray(response_type.terms)) {
|
|
684
|
-
for (const t of response_type.terms) {
|
|
685
|
-
const term = ds.cohort.termdb.q.termjsonByOneid(t);
|
|
686
|
-
if (!term) {
|
|
687
|
-
html += "invalid term id:" + t + " ";
|
|
688
|
-
} else {
|
|
689
|
-
twLst.push({ id: term.id });
|
|
690
|
-
}
|
|
691
|
-
}
|
|
692
|
-
}
|
|
693
|
-
if (response_type.geneNames && Array.isArray(response_type.geneNames)) {
|
|
694
|
-
for (const g of response_type.geneNames) {
|
|
695
|
-
const gene_hits = common_genes.filter((gene) => gene == g.toLowerCase());
|
|
696
|
-
if (gene_hits.length == 0) {
|
|
697
|
-
html += "invalid gene name:" + g + " ";
|
|
698
|
-
} else {
|
|
699
|
-
const geneName = g.toUpperCase();
|
|
700
|
-
if (dataset_json.hasGeneExpression) {
|
|
701
|
-
twLst.push({ term: { gene: geneName, type: "geneExpression" } });
|
|
702
|
-
} else {
|
|
703
|
-
twLst.push({ term: { gene: geneName, name: geneName, type: "geneVariant" } });
|
|
704
|
-
}
|
|
705
|
-
}
|
|
706
|
-
}
|
|
707
|
-
}
|
|
708
|
-
if (response_type.simpleFilter && response_type.simpleFilter.length > 0) {
|
|
709
|
-
const validated_filters = validate_filter(response_type.simpleFilter, ds, "");
|
|
710
|
-
if (validated_filters.html.length > 0) {
|
|
711
|
-
html += validated_filters.html;
|
|
712
|
-
} else {
|
|
713
|
-
pp_plot_json.filter = validated_filters.simplefilter;
|
|
714
|
-
}
|
|
715
|
-
}
|
|
716
|
-
if (html.length > 0) {
|
|
717
|
-
return { type: "html", html };
|
|
718
|
-
} else {
|
|
719
|
-
pp_plot_json.termgroups = [{ name: "", lst: twLst }];
|
|
720
|
-
return { type: "plot", plot: pp_plot_json };
|
|
721
|
-
}
|
|
722
|
-
}
|
|
723
|
-
async function extract_samplescatter_terms_from_query(prompt, llm, dataset_db_output, dataset_json, genes_list, ds, testing) {
|
|
724
|
-
if (!dataset_json.prebuiltPlots || dataset_json.prebuiltPlots.length == 0) {
|
|
725
|
-
return { type: "html", html: "No pre-built scatter plots (t-SNE/UMAP) are available for this dataset" };
|
|
726
|
-
}
|
|
727
|
-
const Schema = {
|
|
728
|
-
$schema: "http://json-schema.org/draft-07/schema#",
|
|
729
|
-
$ref: "#/definitions/SampleScatterType",
|
|
730
|
-
definitions: {
|
|
731
|
-
SampleScatterType: {
|
|
732
|
-
type: "object",
|
|
733
|
-
properties: {
|
|
734
|
-
plotName: {
|
|
735
|
-
type: "string",
|
|
736
|
-
description: "Name of the pre-built scatter plot to display"
|
|
737
|
-
},
|
|
738
|
-
colorTW: {
|
|
739
|
-
type: ["string", "null"],
|
|
740
|
-
description: "Term name or gene name to overlay as color on the scatter plot. Set to null to remove the color overlay."
|
|
741
|
-
},
|
|
742
|
-
shapeTW: {
|
|
743
|
-
type: ["string", "null"],
|
|
744
|
-
description: "Term name or gene name to overlay as shape on the scatter plot. Set to null to remove the shape overlay."
|
|
745
|
-
},
|
|
746
|
-
term0: {
|
|
747
|
-
type: ["string", "null"],
|
|
748
|
-
description: "Term name to use for Z/Divide which splits the plot into panels. Set to null to remove the divide overlay."
|
|
749
|
-
},
|
|
750
|
-
simpleFilter: {
|
|
751
|
-
type: "array",
|
|
752
|
-
items: { $ref: "#/definitions/FilterTerm" },
|
|
753
|
-
description: "Optional simple filter terms to restrict the sample set"
|
|
754
|
-
}
|
|
755
|
-
},
|
|
756
|
-
required: ["plotName"],
|
|
757
|
-
additionalProperties: false
|
|
758
|
-
},
|
|
759
|
-
...FILTER_TERM_DEFINITIONS
|
|
760
|
-
}
|
|
761
|
-
};
|
|
762
|
-
const common_genes = extractGenesFromPrompt(prompt, genes_list);
|
|
763
|
-
const scatter_ds = dataset_json.charts.find((chart) => chart.type == "sampleScatter");
|
|
764
|
-
if (!scatter_ds) throw "sampleScatter information is not present in the dataset file.";
|
|
765
|
-
if (scatter_ds.TrainingData.length == 0) throw "No training data is provided for the sampleScatter agent.";
|
|
766
|
-
const training_data = formatTrainingExamples(scatter_ds.TrainingData);
|
|
767
|
-
const plotNames = dataset_json.prebuiltPlots.map((p) => p.name).join(", ");
|
|
768
|
-
let system_prompt = "I am an assistant that extracts overlay parameters for pre-built scatter plots (t-SNE/UMAP). The final output must be in the following JSON format with NO extra comments. The JSON schema is as follows: " + JSON.stringify(Schema) + " The available pre-built plots are: " + plotNames + '. The "plotName" field must match one of these exactly. The "colorTW", "shapeTW", and "term0" fields should contain names of clinical fields from the sqlite db OR gene names. To remove an overlay, set the corresponding field to null explicitly. If the user does not mention a particular overlay, do NOT include that field in the output (omit it entirely). Only include "colorTW", "shapeTW", or "term0" if the user explicitly mentions coloring, shaping, or dividing. ' + FILTER_DESCRIPTION + checkField(dataset_json.DatasetPrompt) + checkField(scatter_ds.SystemPrompt) + "\n The DB content is as follows: " + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data;
|
|
769
|
-
if (dataset_json.hasGeneExpression && common_genes.length > 0) {
|
|
770
|
-
system_prompt += "\n List of relevant genes are as follows (separated by comma(,)):" + common_genes.join(",");
|
|
771
|
-
}
|
|
772
|
-
system_prompt += " Question: {" + prompt + "} answer:";
|
|
773
|
-
const response = await route_to_appropriate_llm_provider(system_prompt, llm);
|
|
774
|
-
if (testing) {
|
|
775
|
-
return { action: "sampleScatter", response: JSON.parse(response) };
|
|
776
|
-
} else {
|
|
777
|
-
return validate_samplescatter_response(response, common_genes, dataset_json, ds);
|
|
778
|
-
}
|
|
779
|
-
}
|
|
780
|
-
function validate_samplescatter_response(response, common_genes, dataset_json, ds) {
|
|
781
|
-
const response_type = JSON.parse(response);
|
|
782
|
-
let html = "";
|
|
783
|
-
if (response_type.html) html = response_type.html;
|
|
784
|
-
if (!response_type.plotName) {
|
|
785
|
-
html += "plotName is required for sample scatter output";
|
|
786
|
-
} else {
|
|
787
|
-
const matchedPlot = dataset_json.prebuiltPlots.find(
|
|
788
|
-
(p) => p.name.toLowerCase() == response_type.plotName.toLowerCase()
|
|
138
|
+
mayLog("ScaffoldResult: ", scaffoldResult);
|
|
139
|
+
if (plotType === "hiercluster" && "plot" in scaffoldResult && scaffoldResult.type === "plot" || "text" in scaffoldResult && scaffoldResult.type === "text") {
|
|
140
|
+
return scaffoldResult;
|
|
141
|
+
}
|
|
142
|
+
mayLog("Time taken to infer scaffold:", formatElapsedTime(Date.now() - time));
|
|
143
|
+
if (!scaffoldResult)
|
|
144
|
+
throw "Scaffold result is empty or undefined, which is unexpected. Please check the inferScaffold agent for potential issues.";
|
|
145
|
+
if ("type" in scaffoldResult && scaffoldResult.type === "text") {
|
|
146
|
+
return scaffoldResult;
|
|
147
|
+
}
|
|
148
|
+
const subplotType = scaffoldResult.plotType === "summary" ? scaffoldResult.chartType : void 0;
|
|
149
|
+
mayLog("#################################################");
|
|
150
|
+
mayLog("####### Second phase: From Scaffolds's phrases infer Entities #######");
|
|
151
|
+
mayLog("#################################################");
|
|
152
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
153
|
+
const phrase2entityResult = await phrase2entity(
|
|
154
|
+
scaffoldResult,
|
|
155
|
+
plotType,
|
|
156
|
+
llm,
|
|
157
|
+
genes_list,
|
|
158
|
+
dataset_json,
|
|
159
|
+
ds,
|
|
160
|
+
genome
|
|
789
161
|
);
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
}
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
} else {
|
|
826
|
-
pp_plot_json.filter = validated_filters.simplefilter;
|
|
827
|
-
}
|
|
828
|
-
}
|
|
829
|
-
if (html.length > 0) {
|
|
830
|
-
return { type: "html", html };
|
|
831
|
-
} else {
|
|
832
|
-
return { type: "plot", plot: pp_plot_json };
|
|
833
|
-
}
|
|
834
|
-
}
|
|
835
|
-
function validate_term(response_term, common_genes, dataset_json, ds) {
|
|
836
|
-
let html = "";
|
|
837
|
-
let term_type;
|
|
838
|
-
let category = "";
|
|
839
|
-
const term = ds.cohort.termdb.q.termjsonByOneid(response_term);
|
|
840
|
-
if (!term) {
|
|
841
|
-
const gene_hits = common_genes.filter((gene) => gene == response_term.toLowerCase());
|
|
842
|
-
if (gene_hits.length == 0) {
|
|
843
|
-
html += "invalid term id:" + response_term;
|
|
844
|
-
} else {
|
|
845
|
-
if (dataset_json.hasGeneExpression) {
|
|
846
|
-
term_type = { term: { gene: response_term.toUpperCase(), type: "geneExpression" } };
|
|
847
|
-
category = "float";
|
|
848
|
-
} else {
|
|
849
|
-
html += "Dataset does not support gene expression";
|
|
850
|
-
}
|
|
851
|
-
}
|
|
852
|
-
} else {
|
|
853
|
-
term_type = { id: term.id };
|
|
854
|
-
category = term.type;
|
|
855
|
-
}
|
|
856
|
-
return { term_type, html, category };
|
|
857
|
-
}
|
|
858
|
-
function countOccurrences(str, word) {
|
|
859
|
-
if (word === "") return 0;
|
|
860
|
-
let count = 0;
|
|
861
|
-
let pos = 0;
|
|
862
|
-
while ((pos = str.indexOf(word, pos)) !== -1) {
|
|
863
|
-
count++;
|
|
864
|
-
pos += word.length;
|
|
865
|
-
}
|
|
866
|
-
return count;
|
|
867
|
-
}
|
|
868
|
-
function removeLastOccurrence(str, word) {
|
|
869
|
-
const index = str.lastIndexOf(word);
|
|
870
|
-
if (index === -1) return str;
|
|
871
|
-
const occurrences = countOccurrences(str, word);
|
|
872
|
-
if (occurrences === 1) {
|
|
873
|
-
return str;
|
|
874
|
-
} else {
|
|
875
|
-
return str.slice(0, index) + str.slice(index + word.length);
|
|
876
|
-
}
|
|
877
|
-
}
|
|
878
|
-
function sortSameCategoricalFilterKeys(filters, ds) {
|
|
879
|
-
let html = "";
|
|
880
|
-
const keys = filters.map((f) => f.term);
|
|
881
|
-
if (new Set(keys).size == keys.length) return { filters, html };
|
|
882
|
-
const seen = /* @__PURE__ */ new Set();
|
|
883
|
-
const categorical_filter_terms_with_multiple_fields = /* @__PURE__ */ new Set();
|
|
884
|
-
for (const item of filters) {
|
|
885
|
-
if (seen.has(item.term)) categorical_filter_terms_with_multiple_fields.add(item.term);
|
|
886
|
-
else seen.add(item.term);
|
|
887
|
-
}
|
|
888
|
-
const multiple_fields_keys = [];
|
|
889
|
-
for (const key of categorical_filter_terms_with_multiple_fields) {
|
|
890
|
-
const term = ds.cohort.termdb.q.termjsonByOneid(key);
|
|
891
|
-
if (!term) {
|
|
892
|
-
html += "invalid filter id:" + key;
|
|
893
|
-
} else {
|
|
894
|
-
if (term.type == "categorical") {
|
|
895
|
-
const multiple_fields = filters.filter((x) => x.term == key);
|
|
896
|
-
multiple_fields_keys.push({ key, categories: multiple_fields.map((f) => f.category) });
|
|
897
|
-
}
|
|
898
|
-
}
|
|
899
|
-
}
|
|
900
|
-
const sorted_filter = [];
|
|
901
|
-
const seen2 = /* @__PURE__ */ new Set();
|
|
902
|
-
for (const f of filters) {
|
|
903
|
-
const repeated_term = multiple_fields_keys.find((x) => x.key == f.term);
|
|
904
|
-
if (!repeated_term) {
|
|
905
|
-
sorted_filter.push(f);
|
|
906
|
-
} else {
|
|
907
|
-
if (!seen2.has(f.term)) {
|
|
908
|
-
const new_filter_term = {
|
|
909
|
-
term: f.term,
|
|
910
|
-
category: repeated_term.categories
|
|
911
|
-
};
|
|
912
|
-
seen2.add(f.term);
|
|
913
|
-
sorted_filter.push(new_filter_term);
|
|
914
|
-
}
|
|
915
|
-
}
|
|
916
|
-
}
|
|
917
|
-
return { filters: sorted_filter, html };
|
|
918
|
-
}
|
|
919
|
-
function validate_filter(filters, ds, group_name) {
|
|
920
|
-
if (!Array.isArray(filters)) throw "filter is not array";
|
|
921
|
-
const sorted_filters = sortSameCategoricalFilterKeys(filters, ds);
|
|
922
|
-
let filter_result = { html: sorted_filters.html };
|
|
923
|
-
if (sorted_filters.filters.length <= 2) {
|
|
924
|
-
const generated = generate_filter_term(sorted_filters.filters, ds);
|
|
925
|
-
filter_result.simplefilter = generated.simplefilter;
|
|
926
|
-
filter_result.html += generated.html;
|
|
927
|
-
} else {
|
|
928
|
-
if (sorted_filters.filters.length > num_filter_cutoff) {
|
|
929
|
-
filter_result.html += "For now, the maximum number of filter terms supported through the chatbot is " + num_filter_cutoff;
|
|
930
|
-
if (group_name.length > 0) {
|
|
931
|
-
filter_result.html += " . The number of filter terms for group " + group_name + " is " + sorted_filters.filters.length + "\n";
|
|
932
|
-
} else {
|
|
933
|
-
filter_result.html += "The number of filter terms for this query is " + sorted_filters.filters.length;
|
|
934
|
-
}
|
|
935
|
-
} else {
|
|
936
|
-
for (let i = 0; i < sorted_filters.filters.length - 1; i++) {
|
|
937
|
-
const filter_lst = [];
|
|
938
|
-
if (i == 0) {
|
|
939
|
-
filter_lst.push(sorted_filters.filters[i]);
|
|
940
|
-
} else {
|
|
941
|
-
filter_lst.push(filter_result.simplefilter);
|
|
942
|
-
}
|
|
943
|
-
filter_lst.push(sorted_filters.filters[i + 1]);
|
|
944
|
-
filter_result = generate_filter_term(filter_lst, ds);
|
|
945
|
-
}
|
|
946
|
-
}
|
|
947
|
-
}
|
|
948
|
-
return { simplefilter: filter_result.simplefilter, html: filter_result.html };
|
|
949
|
-
}
|
|
950
|
-
function generate_filter_term(filters, ds) {
|
|
951
|
-
let invalid_html = "";
|
|
952
|
-
const localfilter = { type: "tvslst", in: true, lst: [] };
|
|
953
|
-
for (const f of filters) {
|
|
954
|
-
if (f.type == "tvslst") {
|
|
955
|
-
localfilter.lst.push(f);
|
|
956
|
-
} else {
|
|
957
|
-
const term = ds.cohort.termdb.q.termjsonByOneid(f.term);
|
|
958
|
-
if (!term) {
|
|
959
|
-
invalid_html += "invalid filter id:" + f.term;
|
|
960
|
-
} else {
|
|
961
|
-
if (f.join) {
|
|
962
|
-
localfilter.join = f.join;
|
|
963
|
-
}
|
|
964
|
-
if (term.type == "categorical") {
|
|
965
|
-
if (Array.isArray(f.category)) {
|
|
966
|
-
const categories = [];
|
|
967
|
-
for (const category of f.category) {
|
|
968
|
-
const cat = findCategoryKey(term.values, category);
|
|
969
|
-
if (!cat) invalid_html += "invalid category from " + JSON.stringify(f);
|
|
970
|
-
else {
|
|
971
|
-
categories.push({ key: cat });
|
|
972
|
-
}
|
|
973
|
-
}
|
|
974
|
-
localfilter.lst.push({
|
|
975
|
-
type: "tvs",
|
|
976
|
-
tvs: {
|
|
977
|
-
term,
|
|
978
|
-
values: categories
|
|
979
|
-
}
|
|
980
|
-
});
|
|
981
|
-
} else {
|
|
982
|
-
const cat = findCategoryKey(term.values, f.category);
|
|
983
|
-
if (!cat) invalid_html += "invalid category from " + JSON.stringify(f);
|
|
984
|
-
else {
|
|
985
|
-
localfilter.lst.push({
|
|
986
|
-
type: "tvs",
|
|
987
|
-
tvs: {
|
|
988
|
-
term,
|
|
989
|
-
values: [{ key: cat }]
|
|
990
|
-
}
|
|
991
|
-
});
|
|
992
|
-
}
|
|
993
|
-
}
|
|
994
|
-
} else if (term.type == "float" || term.type == "integer") {
|
|
995
|
-
const numeric = {
|
|
996
|
-
type: "tvs",
|
|
997
|
-
tvs: {
|
|
998
|
-
term,
|
|
999
|
-
ranges: []
|
|
1000
|
-
}
|
|
1001
|
-
};
|
|
1002
|
-
const range = {};
|
|
1003
|
-
if (f.start && !f.stop) {
|
|
1004
|
-
range.start = Number(f.start);
|
|
1005
|
-
range.stopunbounded = true;
|
|
1006
|
-
} else if (f.stop && !f.start) {
|
|
1007
|
-
range.stop = Number(f.stop);
|
|
1008
|
-
range.startunbounded = true;
|
|
1009
|
-
} else if (f.start && f.stop) {
|
|
1010
|
-
range.start = Number(f.start);
|
|
1011
|
-
range.stop = Number(f.stop);
|
|
1012
|
-
} else {
|
|
1013
|
-
invalid_html += "Neither greater or lesser defined";
|
|
1014
|
-
}
|
|
1015
|
-
numeric.tvs.ranges.push(range);
|
|
1016
|
-
localfilter.lst.push(numeric);
|
|
1017
|
-
}
|
|
1018
|
-
}
|
|
1019
|
-
}
|
|
1020
|
-
}
|
|
1021
|
-
if (filters.length > 1 && !localfilter.join) {
|
|
1022
|
-
localfilter.join = "and";
|
|
1023
|
-
}
|
|
1024
|
-
return { simplefilter: localfilter, html: invalid_html };
|
|
1025
|
-
}
|
|
1026
|
-
function findCategoryKey(termValues, category) {
|
|
1027
|
-
for (const ck in termValues) {
|
|
1028
|
-
if (ck === category || termValues[ck].label === category) return ck;
|
|
1029
|
-
}
|
|
1030
|
-
return void 0;
|
|
1031
|
-
}
|
|
1032
|
-
async function parse_geneset_db(genedb) {
|
|
1033
|
-
let genes_list = [];
|
|
1034
|
-
const db = new Database(genedb);
|
|
1035
|
-
try {
|
|
1036
|
-
const desc_rows = db.prepare("SELECT name from codingGenes").all();
|
|
1037
|
-
desc_rows.forEach((row) => {
|
|
1038
|
-
genes_list.push(row.name);
|
|
1039
|
-
});
|
|
1040
|
-
genes_list = genes_list.map((str) => str.toLowerCase());
|
|
1041
|
-
} catch (error) {
|
|
1042
|
-
throw "Could not parse geneDB" + error;
|
|
1043
|
-
} finally {
|
|
1044
|
-
db.close();
|
|
1045
|
-
}
|
|
1046
|
-
return genes_list;
|
|
1047
|
-
}
|
|
1048
|
-
async function parse_dataset_db(dataset_db) {
|
|
1049
|
-
const db = new Database(dataset_db);
|
|
1050
|
-
const rag_docs = [];
|
|
1051
|
-
const db_rows = [];
|
|
1052
|
-
try {
|
|
1053
|
-
const desc_rows = db.prepare("SELECT * from termhtmldef").all();
|
|
1054
|
-
const description_map = [];
|
|
1055
|
-
desc_rows.forEach((row) => {
|
|
1056
|
-
const name = row.id;
|
|
1057
|
-
const jsonhtml = JSON.parse(row.jsonhtml);
|
|
1058
|
-
const description = jsonhtml.description[0].value;
|
|
1059
|
-
description_map.push({ name, description });
|
|
1060
|
-
});
|
|
1061
|
-
const term_db_rows = db.prepare("SELECT * from terms").all();
|
|
1062
|
-
term_db_rows.forEach((row) => {
|
|
1063
|
-
const found = description_map.find((item) => item.name === row.id);
|
|
1064
|
-
if (found) {
|
|
1065
|
-
const jsondata = JSON.parse(row.jsondata);
|
|
1066
|
-
const description = description_map.filter((item) => item.name === row.id);
|
|
1067
|
-
const term_type = row.type;
|
|
1068
|
-
const values = [];
|
|
1069
|
-
if (jsondata.values && Object.keys(jsondata.values).length > 0) {
|
|
1070
|
-
for (const key of Object.keys(jsondata.values)) {
|
|
1071
|
-
const value = jsondata.values[key];
|
|
1072
|
-
const db_val = { key, value };
|
|
1073
|
-
values.push(db_val);
|
|
1074
|
-
}
|
|
1075
|
-
}
|
|
1076
|
-
const db_row = {
|
|
1077
|
-
name: row.id,
|
|
1078
|
-
description: description[0].description,
|
|
1079
|
-
values,
|
|
1080
|
-
term_type
|
|
1081
|
-
};
|
|
1082
|
-
const stringified_db = parse_db_rows(db_row);
|
|
1083
|
-
rag_docs.push(stringified_db);
|
|
1084
|
-
db_rows.push(db_row);
|
|
1085
|
-
}
|
|
1086
|
-
});
|
|
1087
|
-
} catch (error) {
|
|
1088
|
-
throw "Error in parsing dataset DB:" + error;
|
|
1089
|
-
} finally {
|
|
1090
|
-
db.close();
|
|
1091
|
-
}
|
|
1092
|
-
return { db_rows, rag_docs };
|
|
1093
|
-
}
|
|
1094
|
-
function parse_db_rows(db_row) {
|
|
1095
|
-
let output_string = 'Name of the field is:"' + db_row.name + '". This field is of the type:' + db_row.term_type + ". Description: " + db_row.description;
|
|
1096
|
-
if (db_row.values.length > 0) {
|
|
1097
|
-
output_string += "This field contains the following possible values.";
|
|
1098
|
-
for (const value of db_row.values) {
|
|
1099
|
-
if (value.value && value.value.label) {
|
|
1100
|
-
output_string += 'The key is "' + value.key + '" and the label is "' + value.value.label + '".';
|
|
1101
|
-
}
|
|
1102
|
-
}
|
|
162
|
+
mayLog("Time taken to phrase 2 entity:", formatElapsedTime(Date.now() - time));
|
|
163
|
+
if ("type" in phrase2entityResult && phrase2entityResult.type === "text") {
|
|
164
|
+
return phrase2entityResult;
|
|
165
|
+
}
|
|
166
|
+
mayLog(phrase2entityResult);
|
|
167
|
+
mayLog("#################################################");
|
|
168
|
+
mayLog("####### Third phase: From Entities infer Term Objects #######");
|
|
169
|
+
mayLog("#################################################");
|
|
170
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
171
|
+
const termObj = await inferTermObjFromEntity(
|
|
172
|
+
phrase2entityResult,
|
|
173
|
+
plotType,
|
|
174
|
+
llm,
|
|
175
|
+
dataset_db,
|
|
176
|
+
genes_list,
|
|
177
|
+
genome
|
|
178
|
+
);
|
|
179
|
+
mayLog("Time taken to infer term objects:", formatElapsedTime(Date.now() - time));
|
|
180
|
+
mayLog("Inferred termObj from entity:", JSON.stringify(termObj));
|
|
181
|
+
mayLog("#################################################");
|
|
182
|
+
mayLog("####### Fourth phase: From Term Objects to TwTvs Objects #######");
|
|
183
|
+
mayLog("#################################################");
|
|
184
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
185
|
+
const twTvsObj = await resolveToTwTvs(termObj, plotType, llm, dataset_db, genome);
|
|
186
|
+
mayLog("Time taken to resolve to TwTvs object from termObj:", formatElapsedTime(Date.now() - time));
|
|
187
|
+
if ("type" in twTvsObj && twTvsObj.type === "text") {
|
|
188
|
+
return twTvsObj;
|
|
189
|
+
}
|
|
190
|
+
mayLog("twTvsObj:", twTvsObj);
|
|
191
|
+
mayLog("#################################################");
|
|
192
|
+
mayLog("####### Fifth/Final phase: From TwTvs Objects to Plot States #######");
|
|
193
|
+
mayLog("#################################################");
|
|
194
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
195
|
+
ai_output_json = resolveToPlotState(twTvsObj, plotType, subplotType);
|
|
196
|
+
mayLog("Time taken to resolve to plot state:", formatElapsedTime(Date.now() - time));
|
|
1103
197
|
}
|
|
1104
|
-
return
|
|
198
|
+
return ai_output_json;
|
|
1105
199
|
}
|
|
1106
200
|
export {
|
|
1107
|
-
|
|
1108
|
-
readJSONFile,
|
|
201
|
+
init,
|
|
1109
202
|
run_chat_pipeline
|
|
1110
203
|
};
|