@sjcrh/proteinpaint-server 2.174.1 → 2.176.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -4
- package/routes/termdb.boxplot.js +4 -1
- package/routes/termdb.categories.js +2 -1
- package/routes/termdb.chat.js +451 -186
- package/routes/termdb.cluster.js +1 -0
- package/routes/termdb.config.js +6 -2
- package/routes/termdb.descrstats.js +8 -2
- package/routes/termdb.runChart.js +50 -35
- package/routes/termdb.sampleScatter.js +4 -2
- package/routes/termdb.violin.js +2 -1
- package/routes/types.js +0 -0
- package/src/app.js +845 -363
- package/src/mds3.gdc.filter.js +1 -1
package/routes/termdb.chat.js
CHANGED
|
@@ -7,6 +7,81 @@ import { mayLog } from "#src/helpers.ts";
|
|
|
7
7
|
import Database from "better-sqlite3";
|
|
8
8
|
import { formatElapsedTime } from "#shared";
|
|
9
9
|
const num_filter_cutoff = 3;
|
|
10
|
+
const FILTER_TERM_DEFINITIONS = {
|
|
11
|
+
FilterTerm: {
|
|
12
|
+
anyOf: [{ $ref: "#/definitions/CategoricalFilterTerm" }, { $ref: "#/definitions/NumericFilterTerm" }]
|
|
13
|
+
},
|
|
14
|
+
CategoricalFilterTerm: {
|
|
15
|
+
type: "object",
|
|
16
|
+
properties: {
|
|
17
|
+
term: { type: "string", description: "Name of categorical term" },
|
|
18
|
+
category: { type: "string", description: "The category of the term" },
|
|
19
|
+
join: {
|
|
20
|
+
type: "string",
|
|
21
|
+
enum: ["and", "or"],
|
|
22
|
+
description: "join term to be used only when there is more than one filter term and should be placed from the 2nd filter term onwards describing how it connects to the previous term"
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
required: ["term", "category"],
|
|
26
|
+
additionalProperties: false
|
|
27
|
+
},
|
|
28
|
+
NumericFilterTerm: {
|
|
29
|
+
type: "object",
|
|
30
|
+
properties: {
|
|
31
|
+
term: { type: "string", description: "Name of numeric term" },
|
|
32
|
+
start: { type: "number", description: "start position (or lower limit) of numeric term" },
|
|
33
|
+
stop: { type: "number", description: "stop position (or upper limit) of numeric term" },
|
|
34
|
+
join: {
|
|
35
|
+
type: "string",
|
|
36
|
+
enum: ["and", "or"],
|
|
37
|
+
description: "join term to be used only when there is more than one filter term and should be placed from the 2nd filter term onwards describing how it connects to the previous term"
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
required: ["term"],
|
|
41
|
+
additionalProperties: false
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
function formatTrainingExamples(trainingData) {
|
|
45
|
+
return trainingData.map(
|
|
46
|
+
(td, i) => "Example question" + (i + 1).toString() + ": " + td.question + " Example answer" + (i + 1).toString() + ":" + JSON.stringify(td.answer)
|
|
47
|
+
).join(" ");
|
|
48
|
+
}
|
|
49
|
+
const FILTER_DESCRIPTION = 'There are two kinds of filter variables: "Categorical" and "Numeric". "Categorical" variables are those variables which can have a fixed set of values e.g. gender, race. They are defined by the "CategoricalFilterTerm" which consists of "term" (a field from the sqlite3 db) and "category" (a value of the field from the sqlite db). "Numeric" variables are those which can have any numeric value. They are defined by "NumericFilterTerm" and contain the subfields "term" (a field from the sqlite3 db), "start" an optional filter which is defined when a lower cutoff is defined in the user input for the numeric variable and "stop" an optional filter which is defined when a higher cutoff is defined in the user input for the numeric variable. ';
|
|
50
|
+
function extractGenesFromPrompt(prompt, genes_list) {
|
|
51
|
+
const words = prompt.replace(/[^a-zA-Z0-9\s]/g, "").split(/\s+/).map((str) => str.toLowerCase());
|
|
52
|
+
return words.filter((item) => genes_list.includes(item));
|
|
53
|
+
}
|
|
54
|
+
const CHILD_TYPE_DEFAULTS = {
|
|
55
|
+
"categorical:undefined": "barchart",
|
|
56
|
+
"numeric:undefined": "violin",
|
|
57
|
+
"categorical:categorical": "barchart",
|
|
58
|
+
"numeric:categorical": "violin",
|
|
59
|
+
"categorical:numeric": "violin",
|
|
60
|
+
"numeric:numeric": "sampleScatter"
|
|
61
|
+
};
|
|
62
|
+
const CHILD_TYPE_INVALID = {
|
|
63
|
+
"categorical:undefined": /* @__PURE__ */ new Set(["violin", "boxplot", "sampleScatter"]),
|
|
64
|
+
"categorical:categorical": /* @__PURE__ */ new Set(["violin", "boxplot", "sampleScatter"])
|
|
65
|
+
};
|
|
66
|
+
function resolveChildType(cat1, cat2, llmChildType) {
|
|
67
|
+
const norm1 = cat1 == "float" || cat1 == "integer" ? "numeric" : cat1 || "undefined";
|
|
68
|
+
const norm2 = cat2 == "float" || cat2 == "integer" ? "numeric" : cat2 || "undefined";
|
|
69
|
+
const key = norm1 + ":" + norm2;
|
|
70
|
+
const defaultType = CHILD_TYPE_DEFAULTS[key];
|
|
71
|
+
if (!defaultType) {
|
|
72
|
+
return { childType: "barchart" };
|
|
73
|
+
}
|
|
74
|
+
const invalid = CHILD_TYPE_INVALID[key];
|
|
75
|
+
if (llmChildType && invalid && invalid.has(llmChildType)) {
|
|
76
|
+
return {
|
|
77
|
+
error: "Invalid plot type supplied by the user: " + llmChildType + ". For " + key.replace(":", " and ") + " variables the plot type should always be " + defaultType
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
return {
|
|
81
|
+
childType: llmChildType || defaultType,
|
|
82
|
+
bothNumeric: norm1 == "numeric" && norm2 == "numeric"
|
|
83
|
+
};
|
|
84
|
+
}
|
|
10
85
|
const api = {
|
|
11
86
|
endpoint: "termdb/chat",
|
|
12
87
|
methods: {
|
|
@@ -32,72 +107,25 @@ function init({ genomes }) {
|
|
|
32
107
|
if (!serverconfig_ds_entries.aifiles) {
|
|
33
108
|
throw "aifiles are missing for chatbot to work";
|
|
34
109
|
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
if (
|
|
38
|
-
|
|
39
|
-
comp_model_name = serverconfig.sj_comp_model_name;
|
|
40
|
-
} else if (serverconfig.llm_backend == "ollama") {
|
|
41
|
-
apilink = serverconfig.ollama_apilink;
|
|
42
|
-
comp_model_name = serverconfig.ollama_comp_model_name;
|
|
43
|
-
} else {
|
|
44
|
-
throw "llm_backend either needs to be 'SJ' or 'ollama'";
|
|
110
|
+
const llm = serverconfig.llm;
|
|
111
|
+
if (!llm) throw "serverconfig.llm is not configured";
|
|
112
|
+
if (llm.provider !== "SJ" && llm.provider !== "ollama") {
|
|
113
|
+
throw "llm.provider must be 'SJ' or 'ollama'";
|
|
45
114
|
}
|
|
46
115
|
const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
|
|
47
116
|
const genedb = serverconfig.tpmasterdir + "/" + g.genedb.dbfile;
|
|
48
117
|
const dataset_json = await readJSONFile(serverconfig_ds_entries.aifiles);
|
|
49
|
-
const
|
|
50
|
-
const
|
|
118
|
+
const testing = false;
|
|
119
|
+
const ai_output_json = await run_chat_pipeline(
|
|
51
120
|
q.prompt,
|
|
52
|
-
|
|
53
|
-
serverconfig.llm_backend,
|
|
54
|
-
apilink,
|
|
121
|
+
llm,
|
|
55
122
|
serverconfig.aiRoute,
|
|
56
|
-
dataset_json
|
|
123
|
+
dataset_json,
|
|
124
|
+
testing,
|
|
125
|
+
dataset_db,
|
|
126
|
+
genedb,
|
|
127
|
+
ds
|
|
57
128
|
);
|
|
58
|
-
let ai_output_json;
|
|
59
|
-
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
60
|
-
if (class_response.type == "html") {
|
|
61
|
-
ai_output_json = class_response;
|
|
62
|
-
} else if (class_response.type == "plot") {
|
|
63
|
-
const classResult = class_response.plot;
|
|
64
|
-
mayLog("classResult:", classResult);
|
|
65
|
-
if (classResult == "summary") {
|
|
66
|
-
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
67
|
-
ai_output_json = await extract_summary_terms(
|
|
68
|
-
q.prompt,
|
|
69
|
-
serverconfig.llm_backend,
|
|
70
|
-
comp_model_name,
|
|
71
|
-
apilink,
|
|
72
|
-
dataset_db,
|
|
73
|
-
dataset_json,
|
|
74
|
-
genedb,
|
|
75
|
-
ds
|
|
76
|
-
);
|
|
77
|
-
mayLog("Time taken for summary agent:", formatElapsedTime(Date.now() - time12));
|
|
78
|
-
} else if (classResult == "dge") {
|
|
79
|
-
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
80
|
-
ai_output_json = await extract_DE_search_terms_from_query(
|
|
81
|
-
q.prompt,
|
|
82
|
-
serverconfig.llm_backend,
|
|
83
|
-
comp_model_name,
|
|
84
|
-
apilink,
|
|
85
|
-
dataset_db,
|
|
86
|
-
dataset_json,
|
|
87
|
-
ds
|
|
88
|
-
);
|
|
89
|
-
mayLog("Time taken for DE agent:", formatElapsedTime(Date.now() - time12));
|
|
90
|
-
} else if (classResult == "survival") {
|
|
91
|
-
ai_output_json = { type: "html", html: "survival agent has not been implemented yet" };
|
|
92
|
-
} else {
|
|
93
|
-
ai_output_json = { type: "html", html: "Unknown classification value" };
|
|
94
|
-
}
|
|
95
|
-
} else {
|
|
96
|
-
ai_output_json = {
|
|
97
|
-
type: "html",
|
|
98
|
-
html: "Unknown classification type"
|
|
99
|
-
};
|
|
100
|
-
}
|
|
101
129
|
res.send(ai_output_json);
|
|
102
130
|
} catch (e) {
|
|
103
131
|
if (e.stack) mayLog(e.stack);
|
|
@@ -105,6 +133,84 @@ function init({ genomes }) {
|
|
|
105
133
|
}
|
|
106
134
|
};
|
|
107
135
|
}
|
|
136
|
+
async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testing, dataset_db, genedb, ds) {
|
|
137
|
+
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
138
|
+
const class_response = await classify_query_by_dataset_type(
|
|
139
|
+
user_prompt,
|
|
140
|
+
llm,
|
|
141
|
+
aiRoute,
|
|
142
|
+
dataset_json,
|
|
143
|
+
testing
|
|
144
|
+
);
|
|
145
|
+
let ai_output_json;
|
|
146
|
+
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
147
|
+
if (class_response.type == "html") {
|
|
148
|
+
ai_output_json = class_response;
|
|
149
|
+
} else if (class_response.type == "plot") {
|
|
150
|
+
const classResult = class_response.plot;
|
|
151
|
+
mayLog("classResult:", classResult);
|
|
152
|
+
const dataset_db_output = await parse_dataset_db(dataset_db);
|
|
153
|
+
const genes_list = dataset_json.hasGeneExpression ? await parse_geneset_db(genedb) : [];
|
|
154
|
+
if (classResult == "summary") {
|
|
155
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
156
|
+
ai_output_json = await extract_summary_terms(
|
|
157
|
+
user_prompt,
|
|
158
|
+
llm,
|
|
159
|
+
dataset_db_output,
|
|
160
|
+
dataset_json,
|
|
161
|
+
genes_list,
|
|
162
|
+
ds,
|
|
163
|
+
testing
|
|
164
|
+
);
|
|
165
|
+
mayLog("Time taken for summary agent:", formatElapsedTime(Date.now() - time12));
|
|
166
|
+
} else if (classResult == "dge") {
|
|
167
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
168
|
+
ai_output_json = await extract_DE_search_terms_from_query(
|
|
169
|
+
user_prompt,
|
|
170
|
+
llm,
|
|
171
|
+
dataset_db_output,
|
|
172
|
+
dataset_json,
|
|
173
|
+
ds,
|
|
174
|
+
testing
|
|
175
|
+
);
|
|
176
|
+
mayLog("Time taken for DE agent:", formatElapsedTime(Date.now() - time12));
|
|
177
|
+
} else if (classResult == "survival") {
|
|
178
|
+
ai_output_json = { type: "html", html: "survival agent has not been implemented yet" };
|
|
179
|
+
} else if (classResult == "matrix") {
|
|
180
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
181
|
+
ai_output_json = await extract_matrix_search_terms_from_query(
|
|
182
|
+
user_prompt,
|
|
183
|
+
llm,
|
|
184
|
+
dataset_db_output,
|
|
185
|
+
dataset_json,
|
|
186
|
+
genes_list,
|
|
187
|
+
ds,
|
|
188
|
+
testing
|
|
189
|
+
);
|
|
190
|
+
mayLog("Time taken for matrix agent:", formatElapsedTime(Date.now() - time12));
|
|
191
|
+
} else if (classResult == "sampleScatter") {
|
|
192
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
193
|
+
ai_output_json = await extract_samplescatter_terms_from_query(
|
|
194
|
+
user_prompt,
|
|
195
|
+
llm,
|
|
196
|
+
dataset_db_output,
|
|
197
|
+
dataset_json,
|
|
198
|
+
genes_list,
|
|
199
|
+
ds,
|
|
200
|
+
testing
|
|
201
|
+
);
|
|
202
|
+
mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
|
|
203
|
+
} else {
|
|
204
|
+
ai_output_json = { type: "html", html: "Unknown classification value" };
|
|
205
|
+
}
|
|
206
|
+
} else {
|
|
207
|
+
ai_output_json = {
|
|
208
|
+
type: "html",
|
|
209
|
+
html: "Unknown classification type"
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
return ai_output_json;
|
|
213
|
+
}
|
|
108
214
|
async function call_ollama(prompt, model_name, apilink) {
|
|
109
215
|
const temperature = 0.01;
|
|
110
216
|
const top_p = 0.95;
|
|
@@ -177,6 +283,17 @@ async function call_sj_llm(prompt, model_name, apilink) {
|
|
|
177
283
|
throw "SJ API request failed:" + error;
|
|
178
284
|
}
|
|
179
285
|
}
|
|
286
|
+
async function route_to_appropriate_llm_provider(template, llm) {
|
|
287
|
+
let response;
|
|
288
|
+
if (llm.provider == "SJ") {
|
|
289
|
+
response = await call_sj_llm(template, llm.modelName, llm.api);
|
|
290
|
+
} else if (llm.provider == "ollama") {
|
|
291
|
+
response = await call_ollama(template, llm.modelName, llm.api);
|
|
292
|
+
} else {
|
|
293
|
+
throw "Unknown LLM provider";
|
|
294
|
+
}
|
|
295
|
+
return response;
|
|
296
|
+
}
|
|
180
297
|
function checkField(sentence) {
|
|
181
298
|
if (!sentence) return "";
|
|
182
299
|
else return sentence;
|
|
@@ -185,7 +302,7 @@ async function readJSONFile(file) {
|
|
|
185
302
|
const json_file = await fs.promises.readFile(file);
|
|
186
303
|
return JSON.parse(json_file.toString());
|
|
187
304
|
}
|
|
188
|
-
async function classify_query_by_dataset_type(user_prompt,
|
|
305
|
+
async function classify_query_by_dataset_type(user_prompt, llm, aiRoute, dataset_json, testing) {
|
|
189
306
|
const data = await readJSONFile(aiRoute);
|
|
190
307
|
let contents = data["general"];
|
|
191
308
|
for (const key of Object.keys(data)) {
|
|
@@ -193,32 +310,24 @@ async function classify_query_by_dataset_type(user_prompt, comp_model_name, llm_
|
|
|
193
310
|
contents += data[key];
|
|
194
311
|
}
|
|
195
312
|
}
|
|
196
|
-
const classification_ds = dataset_json.charts.
|
|
197
|
-
if (classification_ds
|
|
198
|
-
if (classification_ds
|
|
199
|
-
let train_iter = 0;
|
|
313
|
+
const classification_ds = dataset_json.charts.find((chart) => chart.type == "Classification");
|
|
314
|
+
if (!classification_ds) throw "Classification information is not present in the dataset file.";
|
|
315
|
+
if (classification_ds.TrainingData.length == 0) throw "No training data is provided for the classification agent.";
|
|
200
316
|
let training_data = "";
|
|
201
|
-
if (classification_ds
|
|
202
|
-
contents += checkField(dataset_json.DatasetPrompt) + checkField(classification_ds
|
|
203
|
-
|
|
204
|
-
train_iter += 1;
|
|
205
|
-
training_data += "Example question" + train_iter.toString() + ": " + train_data.question + " Example answer" + train_iter.toString() + ":" + JSON.stringify(train_data.answer) + " ";
|
|
206
|
-
}
|
|
317
|
+
if (classification_ds && classification_ds.TrainingData.length > 0) {
|
|
318
|
+
contents += checkField(dataset_json.DatasetPrompt) + checkField(classification_ds.SystemPrompt);
|
|
319
|
+
training_data = formatTrainingExamples(classification_ds.TrainingData);
|
|
207
320
|
}
|
|
208
321
|
const template = contents + " training data is as follows:" + training_data + " Question: {" + user_prompt + "} Answer: {answer}";
|
|
209
|
-
|
|
210
|
-
if (
|
|
211
|
-
|
|
212
|
-
} else if (llm_backend_type == "ollama") {
|
|
213
|
-
response = await call_ollama(template, comp_model_name, apilink);
|
|
322
|
+
const response = await route_to_appropriate_llm_provider(template, llm);
|
|
323
|
+
if (testing) {
|
|
324
|
+
return { action: "html", response: JSON.parse(response) };
|
|
214
325
|
} else {
|
|
215
|
-
|
|
326
|
+
return JSON.parse(response);
|
|
216
327
|
}
|
|
217
|
-
return JSON.parse(response);
|
|
218
328
|
}
|
|
219
|
-
async function extract_DE_search_terms_from_query(prompt,
|
|
329
|
+
async function extract_DE_search_terms_from_query(prompt, llm, dataset_db_output, dataset_json, ds, testing) {
|
|
220
330
|
if (dataset_json.hasDE) {
|
|
221
|
-
const dataset_db_output = await parse_dataset_db(dataset_db);
|
|
222
331
|
const Schema = {
|
|
223
332
|
$schema: "http://json-schema.org/draft-07/schema#",
|
|
224
333
|
$ref: "#/definitions/DEType",
|
|
@@ -245,60 +354,20 @@ async function extract_DE_search_terms_from_query(prompt, llm_backend_type, comp
|
|
|
245
354
|
required: ["group1", "group2"],
|
|
246
355
|
additionalProperties: false
|
|
247
356
|
},
|
|
248
|
-
|
|
249
|
-
anyOf: [{ $ref: "#/definitions/CategoricalFilterTerm" }, { $ref: "#/definitions/NumericFilterTerm" }]
|
|
250
|
-
},
|
|
251
|
-
CategoricalFilterTerm: {
|
|
252
|
-
type: "object",
|
|
253
|
-
properties: {
|
|
254
|
-
term: { type: "string", description: "Name of categorical term" },
|
|
255
|
-
category: { type: "string", description: "The category of the term" },
|
|
256
|
-
join: {
|
|
257
|
-
type: "string",
|
|
258
|
-
enum: ["and", "or"],
|
|
259
|
-
description: "join term to be used only when there is more than one filter term and should be placed from the 2nd filter term onwards describing how it connects to the previous term"
|
|
260
|
-
}
|
|
261
|
-
},
|
|
262
|
-
required: ["term", "category"],
|
|
263
|
-
additionalProperties: false
|
|
264
|
-
},
|
|
265
|
-
NumericFilterTerm: {
|
|
266
|
-
type: "object",
|
|
267
|
-
properties: {
|
|
268
|
-
term: { type: "string", description: "Name of numeric term" },
|
|
269
|
-
start: { type: "number", description: "start position (or lower limit) of numeric term" },
|
|
270
|
-
stop: { type: "number", description: "stop position (or upper limit) of numeric term" },
|
|
271
|
-
join: {
|
|
272
|
-
type: "string",
|
|
273
|
-
enum: ["and", "or"],
|
|
274
|
-
description: "join term to be used only when there is more than one filter term and should be placed from the 2nd filter term onwards describing how it connects to the previous term"
|
|
275
|
-
}
|
|
276
|
-
},
|
|
277
|
-
required: ["term"],
|
|
278
|
-
additionalProperties: false
|
|
279
|
-
}
|
|
357
|
+
...FILTER_TERM_DEFINITIONS
|
|
280
358
|
}
|
|
281
359
|
};
|
|
282
|
-
const DE_ds = dataset_json.charts.
|
|
283
|
-
if (DE_ds
|
|
284
|
-
if (DE_ds
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
}
|
|
291
|
-
const system_prompt = "I am an assistant that extracts the groups from the user prompt to carry out differential gene expression. The final output must be in the following JSON with NO extra comments. The schema is as follows: " + JSON.stringify(Schema) + ' . "group1" and "group2" fields are compulsory. Both "group1" and "group2" consist of an array of filter variables. There are two kinds of filter variables: "Categorical" and "Numeric". "Categorical" variables are those variables which can have a fixed set of values e.g. gender, race. They are defined by the "CategoricalFilterTerm" which consists of "term" (a field from the sqlite3 db) and "category" (a value of the field from the sqlite db). "Numeric" variables are those which can have any numeric value. They are defined by "NumericFilterTerm" and contain the subfields "term" (a field from the sqlite3 db), "start" an optional filter which is defined when a lower cutoff is defined in the user input for the numeric variable and "stop" an optional filter which is defined when a higher cutoff is defined in the user input for the numeric variable. ' + // May consider deprecating this natural language description after units tests are implemented
|
|
292
|
-
checkField(dataset_json.DatasetPrompt) + checkField(DE_ds[0].SystemPrompt) + "The sqlite db in plain language is as follows:\n" + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data + " Question: {" + prompt + "} answer:";
|
|
293
|
-
let response;
|
|
294
|
-
if (llm_backend_type == "SJ") {
|
|
295
|
-
response = await call_sj_llm(system_prompt, comp_model_name, apilink);
|
|
296
|
-
} else if (llm_backend_type == "ollama") {
|
|
297
|
-
response = await call_ollama(system_prompt, comp_model_name, apilink);
|
|
360
|
+
const DE_ds = dataset_json.charts.find((chart) => chart.type == "DE");
|
|
361
|
+
if (!DE_ds) throw "DE information is not present in the dataset file.";
|
|
362
|
+
if (DE_ds.TrainingData.length == 0) throw "No training data is provided for the DE agent.";
|
|
363
|
+
const training_data = formatTrainingExamples(DE_ds.TrainingData);
|
|
364
|
+
const system_prompt = "I am an assistant that extracts the groups from the user prompt to carry out differential gene expression. The final output must be in the following JSON with NO extra comments. The schema is as follows: " + JSON.stringify(Schema) + ' . "group1" and "group2" fields are compulsory. Both "group1" and "group2" consist of an array of filter variables. ' + FILTER_DESCRIPTION + checkField(dataset_json.DatasetPrompt) + checkField(DE_ds.SystemPrompt) + "The sqlite db in plain language is as follows:\n" + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data + " Question: {" + prompt + "} answer:";
|
|
365
|
+
const response = await route_to_appropriate_llm_provider(system_prompt, llm);
|
|
366
|
+
if (testing) {
|
|
367
|
+
return { action: "dge", response: JSON.parse(response) };
|
|
298
368
|
} else {
|
|
299
|
-
|
|
369
|
+
return await validate_DE_response(response, ds, dataset_db_output.db_rows);
|
|
300
370
|
}
|
|
301
|
-
return await validate_DE_response(response, ds, dataset_db_output.db_rows);
|
|
302
371
|
} else {
|
|
303
372
|
return { type: "html", html: "Differential gene expression not supported for this dataset" };
|
|
304
373
|
}
|
|
@@ -447,9 +516,7 @@ function find_label(filter, db_rows) {
|
|
|
447
516
|
}
|
|
448
517
|
return label;
|
|
449
518
|
}
|
|
450
|
-
async function extract_summary_terms(prompt,
|
|
451
|
-
const dataset_db_output = await parse_dataset_db(dataset_db);
|
|
452
|
-
const genes_list = await parse_geneset_db(genedb);
|
|
519
|
+
async function extract_summary_terms(prompt, llm, dataset_db_output, dataset_json, genes_list, ds, testing) {
|
|
453
520
|
const Schema = {
|
|
454
521
|
$schema: "http://json-schema.org/draft-07/schema#",
|
|
455
522
|
$ref: "#/definitions/SummaryType",
|
|
@@ -463,73 +530,37 @@ async function extract_summary_terms(prompt, llm_backend_type, comp_model_name,
|
|
|
463
530
|
type: "array",
|
|
464
531
|
items: { $ref: "#/definitions/FilterTerm" },
|
|
465
532
|
description: "Optional simple filter terms"
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
required: ["term", "simpleFilter"],
|
|
469
|
-
additionalProperties: false
|
|
470
|
-
},
|
|
471
|
-
FilterTerm: {
|
|
472
|
-
anyOf: [{ $ref: "#/definitions/CategoricalFilterTerm" }, { $ref: "#/definitions/NumericFilterTerm" }]
|
|
473
|
-
},
|
|
474
|
-
CategoricalFilterTerm: {
|
|
475
|
-
type: "object",
|
|
476
|
-
properties: {
|
|
477
|
-
term: { type: "string", description: "Name of categorical term" },
|
|
478
|
-
category: { type: "string", description: "The category of the term" },
|
|
479
|
-
join: {
|
|
533
|
+
},
|
|
534
|
+
childType: {
|
|
480
535
|
type: "string",
|
|
481
|
-
enum: ["
|
|
482
|
-
description: "
|
|
536
|
+
enum: ["violin", "boxplot", "sampleScatter", "barchart"],
|
|
537
|
+
description: "Optional explicit child type requested by the user. If omitted, the logic of the data types picks the child type."
|
|
483
538
|
}
|
|
484
539
|
},
|
|
485
|
-
required: ["term", "
|
|
540
|
+
required: ["term", "simpleFilter"],
|
|
486
541
|
additionalProperties: false
|
|
487
542
|
},
|
|
488
|
-
|
|
489
|
-
type: "object",
|
|
490
|
-
properties: {
|
|
491
|
-
term: { type: "string", description: "Name of numeric term" },
|
|
492
|
-
start: { type: "number", description: "start position (or lower limit) of numeric term" },
|
|
493
|
-
stop: { type: "number", description: "stop position (or upper limit) of numeric term" },
|
|
494
|
-
join: {
|
|
495
|
-
type: "string",
|
|
496
|
-
enum: ["and", "or"],
|
|
497
|
-
description: "join term to be used only when there there is more than one filter term and should be placed in the 2nd filter term describing how it connects to the 1st term"
|
|
498
|
-
}
|
|
499
|
-
},
|
|
500
|
-
required: ["term"],
|
|
501
|
-
additionalProperties: false
|
|
502
|
-
}
|
|
543
|
+
...FILTER_TERM_DEFINITIONS
|
|
503
544
|
}
|
|
504
545
|
};
|
|
505
|
-
const
|
|
506
|
-
const
|
|
507
|
-
|
|
508
|
-
if (summary_ds.length == 0) throw "
|
|
509
|
-
|
|
510
|
-
let
|
|
511
|
-
let training_data = "";
|
|
512
|
-
for (const train_data of summary_ds[0].TrainingData) {
|
|
513
|
-
train_iter += 1;
|
|
514
|
-
training_data += "Example question" + train_iter.toString() + ": " + train_data.question + " Example answer" + train_iter.toString() + ":" + JSON.stringify(train_data.answer) + " ";
|
|
515
|
-
}
|
|
516
|
-
let system_prompt = "I am an assistant that extracts the summary terms from user query. The final output must be in the following JSON format with NO extra comments. The JSON schema is as follows: " + JSON.stringify(Schema) + ' term and term2 (if present) should ONLY contain names of the fields from the sqlite db. The "simpleFilter" field is optional and should contain an array of JSON terms with which the dataset will be filtered. A variable simultaneously CANNOT be part of both "term"/"term2" and "simpleFilter". There are two kinds of filter variables: "Categorical" and "Numeric". "Categorical" variables are those variables which can have a fixed set of values e.g. gender, race. They are defined by the "CategoricalFilterTerm" which consists of "term" (a field from the sqlite3 db) and "category" (a value of the field from the sqlite db). "Numeric" variables are those which can have any numeric value. They are defined by "NumericFilterTerm" and contain the subfields "term" (a field from the sqlite3 db), "start" an optional filter which is defined when a lower cutoff is defined in the user input for the numeric variable and "stop" an optional filter which is defined when a higher cutoff is defined in the user input for the numeric variable. ' + // May consider deprecating this natural language description after unit tests are implemented
|
|
517
|
-
checkField(dataset_json.DatasetPrompt) + checkField(summary_ds[0].SystemPrompt) + "\n The DB content is as follows: " + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data;
|
|
546
|
+
const common_genes = extractGenesFromPrompt(prompt, genes_list);
|
|
547
|
+
const summary_ds = dataset_json.charts.find((chart) => chart.type == "Summary");
|
|
548
|
+
if (!summary_ds) throw "Summary information is not present in the dataset file.";
|
|
549
|
+
if (summary_ds.TrainingData.length == 0) throw "No training data is provided for the summary agent.";
|
|
550
|
+
const training_data = formatTrainingExamples(summary_ds.TrainingData);
|
|
551
|
+
let system_prompt = "I am an assistant that extracts the summary terms from user query. The final output must be in the following JSON format with NO extra comments. The JSON schema is as follows: " + JSON.stringify(Schema) + ' term and term2 (if present) should ONLY contain names of the fields from the sqlite db. The "simpleFilter" field is optional and should contain an array of JSON terms with which the dataset will be filtered. ' + FILTER_DESCRIPTION + checkField(dataset_json.DatasetPrompt) + checkField(summary_ds.SystemPrompt) + "\n The DB content is as follows: " + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data;
|
|
518
552
|
if (dataset_json.hasGeneExpression) {
|
|
519
553
|
if (common_genes.length > 0) {
|
|
520
554
|
system_prompt += "\n List of relevant genes are as follows (separated by comma(,)):" + common_genes.join(",");
|
|
521
555
|
}
|
|
522
556
|
}
|
|
523
557
|
system_prompt += " Question: {" + prompt + "} answer:";
|
|
524
|
-
|
|
525
|
-
if (
|
|
526
|
-
|
|
527
|
-
} else if (llm_backend_type == "ollama") {
|
|
528
|
-
response = await call_ollama(system_prompt, comp_model_name, apilink);
|
|
558
|
+
const response = await route_to_appropriate_llm_provider(system_prompt, llm);
|
|
559
|
+
if (testing) {
|
|
560
|
+
return { action: "summary", response: JSON.parse(response) };
|
|
529
561
|
} else {
|
|
530
|
-
|
|
562
|
+
return validate_summary_response(response, common_genes, dataset_json, ds);
|
|
531
563
|
}
|
|
532
|
-
return validate_summary_response(response, common_genes, dataset_json, ds);
|
|
533
564
|
}
|
|
534
565
|
function validate_summary_response(response, common_genes, dataset_json, ds) {
|
|
535
566
|
const response_type = JSON.parse(response);
|
|
@@ -542,6 +573,10 @@ function validate_summary_response(response, common_genes, dataset_json, ds) {
|
|
|
542
573
|
html += term1_validation.html;
|
|
543
574
|
} else {
|
|
544
575
|
pp_plot_json.term = term1_validation.term_type;
|
|
576
|
+
if (term1_validation.category == "float" || term1_validation.category == "integer") {
|
|
577
|
+
pp_plot_json.term.q = { mode: "continuous" };
|
|
578
|
+
}
|
|
579
|
+
pp_plot_json.category = term1_validation.category;
|
|
545
580
|
}
|
|
546
581
|
if (response_type.term2) {
|
|
547
582
|
const term2_validation = validate_term(response_type.term2, common_genes, dataset_json, ds);
|
|
@@ -549,8 +584,233 @@ function validate_summary_response(response, common_genes, dataset_json, ds) {
|
|
|
549
584
|
html += term2_validation.html;
|
|
550
585
|
} else {
|
|
551
586
|
pp_plot_json.term2 = term2_validation.term_type;
|
|
587
|
+
if (term2_validation.category == "float" || term2_validation.category == "integer") {
|
|
588
|
+
pp_plot_json.term2.q = { mode: "continuous" };
|
|
589
|
+
}
|
|
590
|
+
pp_plot_json.category2 = term2_validation.category;
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
const llmChildType = response_type.childType && ["violin", "boxplot", "sampleScatter", "barchart"].includes(response_type.childType) ? response_type.childType : void 0;
|
|
594
|
+
const resolved = resolveChildType(pp_plot_json.category, pp_plot_json.category2, llmChildType);
|
|
595
|
+
if (resolved.error) {
|
|
596
|
+
html += resolved.error;
|
|
597
|
+
} else {
|
|
598
|
+
pp_plot_json.childType = resolved.childType;
|
|
599
|
+
if (resolved.bothNumeric && (resolved.childType == "violin" || resolved.childType == "boxplot")) {
|
|
600
|
+
pp_plot_json.term2.q = { mode: "discrete" };
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
delete pp_plot_json.category;
|
|
604
|
+
if (pp_plot_json.category2) delete pp_plot_json.category2;
|
|
605
|
+
if (response_type.simpleFilter && response_type.simpleFilter.length > 0) {
|
|
606
|
+
const validated_filters = validate_filter(response_type.simpleFilter, ds, "");
|
|
607
|
+
if (validated_filters.html.length > 0) {
|
|
608
|
+
html += validated_filters.html;
|
|
609
|
+
} else {
|
|
610
|
+
pp_plot_json.filter = validated_filters.simplefilter;
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
if (html.length > 0) {
|
|
614
|
+
return { type: "html", html };
|
|
615
|
+
} else {
|
|
616
|
+
return { type: "plot", plot: pp_plot_json };
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
async function extract_matrix_search_terms_from_query(prompt, llm, dataset_db_output, dataset_json, genes_list, ds, testing) {
|
|
620
|
+
const Schema = {
|
|
621
|
+
$schema: "http://json-schema.org/draft-07/schema#",
|
|
622
|
+
$ref: "#/definitions/MatrixType",
|
|
623
|
+
definitions: {
|
|
624
|
+
MatrixType: {
|
|
625
|
+
type: "object",
|
|
626
|
+
properties: {
|
|
627
|
+
terms: {
|
|
628
|
+
type: "array",
|
|
629
|
+
items: { type: "string" },
|
|
630
|
+
description: "Names of dictionary/clinical terms to include as rows in the matrix"
|
|
631
|
+
},
|
|
632
|
+
geneNames: {
|
|
633
|
+
type: "array",
|
|
634
|
+
items: { type: "string" },
|
|
635
|
+
description: "Names of genes to include as gene variant rows in the matrix"
|
|
636
|
+
},
|
|
637
|
+
simpleFilter: {
|
|
638
|
+
type: "array",
|
|
639
|
+
items: { $ref: "#/definitions/FilterTerm" },
|
|
640
|
+
description: "Optional simple filter terms to restrict the sample set"
|
|
641
|
+
}
|
|
642
|
+
},
|
|
643
|
+
additionalProperties: false
|
|
644
|
+
},
|
|
645
|
+
...FILTER_TERM_DEFINITIONS
|
|
552
646
|
}
|
|
647
|
+
};
|
|
648
|
+
const common_genes = extractGenesFromPrompt(prompt, genes_list);
|
|
649
|
+
const matrix_ds = dataset_json.charts.filter((chart) => chart.type == "Matrix");
|
|
650
|
+
console.log("matrix_ds", matrix_ds);
|
|
651
|
+
console.log("dataset_json.charts", dataset_json.charts);
|
|
652
|
+
if (matrix_ds.length == 0) throw "Matrix information is not present in the dataset file.";
|
|
653
|
+
if (matrix_ds[0].TrainingData.length == 0) throw "No training data is provided for the matrix agent.";
|
|
654
|
+
const training_data = formatTrainingExamples(matrix_ds[0].TrainingData);
|
|
655
|
+
let system_prompt = "I am an assistant that extracts terms and gene names from the user query to create a matrix plot. A matrix plot displays multiple genes and/or clinical variables across samples in a grid layout. The final output must be in the following JSON format with NO extra comments. The JSON schema is as follows: " + JSON.stringify(Schema) + ' The "terms" field should ONLY contain names of clinical/dictionary fields from the sqlite db. The "geneNames" field should ONLY contain gene names. At least one of "terms" or "geneNames" must be provided. The "simpleFilter" field is optional and should contain an array of JSON terms with which the dataset will be filtered. ' + FILTER_DESCRIPTION + checkField(dataset_json.DatasetPrompt) + checkField(matrix_ds[0].SystemPrompt) + "\n The DB content is as follows: " + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data;
|
|
656
|
+
if (dataset_json.hasGeneExpression && common_genes.length > 0) {
|
|
657
|
+
system_prompt += "\n List of relevant genes are as follows (separated by comma(,)):" + common_genes.join(",");
|
|
658
|
+
}
|
|
659
|
+
system_prompt += " Question: {" + prompt + "} answer:";
|
|
660
|
+
const response = await route_to_appropriate_llm_provider(system_prompt, llm);
|
|
661
|
+
if (testing) {
|
|
662
|
+
return { action: "matrix", response: JSON.parse(response) };
|
|
663
|
+
} else {
|
|
664
|
+
return validate_matrix_response(response, common_genes, dataset_json, ds);
|
|
553
665
|
}
|
|
666
|
+
}
|
|
667
|
+
function validate_matrix_response(response, common_genes, dataset_json, ds) {
|
|
668
|
+
const response_type = JSON.parse(response);
|
|
669
|
+
const pp_plot_json = { chartType: "matrix" };
|
|
670
|
+
let html = "";
|
|
671
|
+
if (response_type.html) html = response_type.html;
|
|
672
|
+
if ((!response_type.terms || response_type.terms.length == 0) && (!response_type.geneNames || response_type.geneNames.length == 0)) {
|
|
673
|
+
html += "At least one clinical term or gene name is required for a matrix plot";
|
|
674
|
+
}
|
|
675
|
+
const twLst = [];
|
|
676
|
+
if (response_type.terms && Array.isArray(response_type.terms)) {
|
|
677
|
+
for (const t of response_type.terms) {
|
|
678
|
+
const term = ds.cohort.termdb.q.termjsonByOneid(t);
|
|
679
|
+
if (!term) {
|
|
680
|
+
html += "invalid term id:" + t + " ";
|
|
681
|
+
} else {
|
|
682
|
+
twLst.push({ id: term.id });
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
if (response_type.geneNames && Array.isArray(response_type.geneNames)) {
|
|
687
|
+
for (const g of response_type.geneNames) {
|
|
688
|
+
const gene_hits = common_genes.filter((gene) => gene == g.toLowerCase());
|
|
689
|
+
if (gene_hits.length == 0) {
|
|
690
|
+
html += "invalid gene name:" + g + " ";
|
|
691
|
+
} else {
|
|
692
|
+
const geneName = g.toUpperCase();
|
|
693
|
+
if (dataset_json.hasGeneExpression) {
|
|
694
|
+
twLst.push({ term: { gene: geneName, type: "geneExpression" } });
|
|
695
|
+
} else {
|
|
696
|
+
twLst.push({ term: { gene: geneName, name: geneName, type: "geneVariant" } });
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
if (response_type.simpleFilter && response_type.simpleFilter.length > 0) {
|
|
702
|
+
const validated_filters = validate_filter(response_type.simpleFilter, ds, "");
|
|
703
|
+
if (validated_filters.html.length > 0) {
|
|
704
|
+
html += validated_filters.html;
|
|
705
|
+
} else {
|
|
706
|
+
pp_plot_json.filter = validated_filters.simplefilter;
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
if (html.length > 0) {
|
|
710
|
+
return { type: "html", html };
|
|
711
|
+
} else {
|
|
712
|
+
pp_plot_json.termgroups = [{ name: "", lst: twLst }];
|
|
713
|
+
return { type: "plot", plot: pp_plot_json };
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
async function extract_samplescatter_terms_from_query(prompt, llm, dataset_db_output, dataset_json, genes_list, ds, testing) {
|
|
717
|
+
if (!dataset_json.prebuiltPlots || dataset_json.prebuiltPlots.length == 0) {
|
|
718
|
+
return { type: "html", html: "No pre-built scatter plots (t-SNE/UMAP) are available for this dataset" };
|
|
719
|
+
}
|
|
720
|
+
const Schema = {
|
|
721
|
+
$schema: "http://json-schema.org/draft-07/schema#",
|
|
722
|
+
$ref: "#/definitions/SampleScatterType",
|
|
723
|
+
definitions: {
|
|
724
|
+
SampleScatterType: {
|
|
725
|
+
type: "object",
|
|
726
|
+
properties: {
|
|
727
|
+
plotName: {
|
|
728
|
+
type: "string",
|
|
729
|
+
description: "Name of the pre-built scatter plot to display"
|
|
730
|
+
},
|
|
731
|
+
colorTW: {
|
|
732
|
+
type: ["string", "null"],
|
|
733
|
+
description: "Term name or gene name to overlay as color on the scatter plot. Set to null to remove the color overlay."
|
|
734
|
+
},
|
|
735
|
+
shapeTW: {
|
|
736
|
+
type: ["string", "null"],
|
|
737
|
+
description: "Term name or gene name to overlay as shape on the scatter plot. Set to null to remove the shape overlay."
|
|
738
|
+
},
|
|
739
|
+
term0: {
|
|
740
|
+
type: ["string", "null"],
|
|
741
|
+
description: "Term name to use for Z/Divide which splits the plot into panels. Set to null to remove the divide overlay."
|
|
742
|
+
},
|
|
743
|
+
simpleFilter: {
|
|
744
|
+
type: "array",
|
|
745
|
+
items: { $ref: "#/definitions/FilterTerm" },
|
|
746
|
+
description: "Optional simple filter terms to restrict the sample set"
|
|
747
|
+
}
|
|
748
|
+
},
|
|
749
|
+
required: ["plotName"],
|
|
750
|
+
additionalProperties: false
|
|
751
|
+
},
|
|
752
|
+
...FILTER_TERM_DEFINITIONS
|
|
753
|
+
}
|
|
754
|
+
};
|
|
755
|
+
const common_genes = extractGenesFromPrompt(prompt, genes_list);
|
|
756
|
+
const scatter_ds = dataset_json.charts.find((chart) => chart.type == "sampleScatter");
|
|
757
|
+
if (!scatter_ds) throw "sampleScatter information is not present in the dataset file.";
|
|
758
|
+
if (scatter_ds.TrainingData.length == 0) throw "No training data is provided for the sampleScatter agent.";
|
|
759
|
+
const training_data = formatTrainingExamples(scatter_ds.TrainingData);
|
|
760
|
+
const plotNames = dataset_json.prebuiltPlots.map((p) => p.name).join(", ");
|
|
761
|
+
let system_prompt = "I am an assistant that extracts overlay parameters for pre-built scatter plots (t-SNE/UMAP). The final output must be in the following JSON format with NO extra comments. The JSON schema is as follows: " + JSON.stringify(Schema) + " The available pre-built plots are: " + plotNames + '. The "plotName" field must match one of these exactly. The "colorTW", "shapeTW", and "term0" fields should contain names of clinical fields from the sqlite db OR gene names. To remove an overlay, set the corresponding field to null explicitly. If the user does not mention a particular overlay, do NOT include that field in the output (omit it entirely). Only include "colorTW", "shapeTW", or "term0" if the user explicitly mentions coloring, shaping, or dividing. ' + FILTER_DESCRIPTION + checkField(dataset_json.DatasetPrompt) + checkField(scatter_ds.SystemPrompt) + "\n The DB content is as follows: " + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data;
|
|
762
|
+
if (dataset_json.hasGeneExpression && common_genes.length > 0) {
|
|
763
|
+
system_prompt += "\n List of relevant genes are as follows (separated by comma(,)):" + common_genes.join(",");
|
|
764
|
+
}
|
|
765
|
+
system_prompt += " Question: {" + prompt + "} answer:";
|
|
766
|
+
const response = await route_to_appropriate_llm_provider(system_prompt, llm);
|
|
767
|
+
if (testing) {
|
|
768
|
+
return { action: "sampleScatter", response: JSON.parse(response) };
|
|
769
|
+
} else {
|
|
770
|
+
return validate_samplescatter_response(response, common_genes, dataset_json, ds);
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
function validate_samplescatter_response(response, common_genes, dataset_json, ds) {
|
|
774
|
+
const response_type = JSON.parse(response);
|
|
775
|
+
let html = "";
|
|
776
|
+
if (response_type.html) html = response_type.html;
|
|
777
|
+
if (!response_type.plotName) {
|
|
778
|
+
html += "plotName is required for sample scatter output";
|
|
779
|
+
} else {
|
|
780
|
+
const matchedPlot = dataset_json.prebuiltPlots.find(
|
|
781
|
+
(p) => p.name.toLowerCase() == response_type.plotName.toLowerCase()
|
|
782
|
+
);
|
|
783
|
+
if (!matchedPlot) {
|
|
784
|
+
const availablePlots = dataset_json.prebuiltPlots.map((p) => p.name).join(", ");
|
|
785
|
+
html += "Unknown plot name: " + response_type.plotName + ". Available plots are: " + availablePlots;
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
const pp_plot_json = {
|
|
789
|
+
chartType: "sampleScatter",
|
|
790
|
+
name: response_type.plotName
|
|
791
|
+
};
|
|
792
|
+
const validateOverlayTerm = (termName, fieldKey) => {
|
|
793
|
+
if (termName === null) {
|
|
794
|
+
pp_plot_json[fieldKey] = null;
|
|
795
|
+
return;
|
|
796
|
+
}
|
|
797
|
+
if (termName === void 0) {
|
|
798
|
+
return;
|
|
799
|
+
}
|
|
800
|
+
const termValidation = validate_term(termName, common_genes, dataset_json, ds);
|
|
801
|
+
if (termValidation.html.length > 0) {
|
|
802
|
+
html += termValidation.html;
|
|
803
|
+
} else {
|
|
804
|
+
const tw = { ...termValidation.term_type };
|
|
805
|
+
if (termValidation.category == "float" || termValidation.category == "integer") {
|
|
806
|
+
tw.q = { mode: "continuous" };
|
|
807
|
+
}
|
|
808
|
+
pp_plot_json[fieldKey] = tw;
|
|
809
|
+
}
|
|
810
|
+
};
|
|
811
|
+
validateOverlayTerm(response_type.colorTW, "colorTW");
|
|
812
|
+
validateOverlayTerm(response_type.shapeTW, "shapeTW");
|
|
813
|
+
validateOverlayTerm(response_type.term0, "term0");
|
|
554
814
|
if (response_type.simpleFilter && response_type.simpleFilter.length > 0) {
|
|
555
815
|
const validated_filters = validate_filter(response_type.simpleFilter, ds, "");
|
|
556
816
|
if (validated_filters.html.length > 0) {
|
|
@@ -568,6 +828,7 @@ function validate_summary_response(response, common_genes, dataset_json, ds) {
|
|
|
568
828
|
function validate_term(response_term, common_genes, dataset_json, ds) {
|
|
569
829
|
let html = "";
|
|
570
830
|
let term_type;
|
|
831
|
+
let category = "";
|
|
571
832
|
const term = ds.cohort.termdb.q.termjsonByOneid(response_term);
|
|
572
833
|
if (!term) {
|
|
573
834
|
const gene_hits = common_genes.filter((gene) => gene == response_term.toLowerCase());
|
|
@@ -576,14 +837,16 @@ function validate_term(response_term, common_genes, dataset_json, ds) {
|
|
|
576
837
|
} else {
|
|
577
838
|
if (dataset_json.hasGeneExpression) {
|
|
578
839
|
term_type = { term: { gene: response_term.toUpperCase(), type: "geneExpression" } };
|
|
840
|
+
category = "float";
|
|
579
841
|
} else {
|
|
580
842
|
html += "Dataset does not support gene expression";
|
|
581
843
|
}
|
|
582
844
|
}
|
|
583
845
|
} else {
|
|
584
846
|
term_type = { id: term.id };
|
|
847
|
+
category = term.type;
|
|
585
848
|
}
|
|
586
|
-
return { term_type, html };
|
|
849
|
+
return { term_type, html, category };
|
|
587
850
|
}
|
|
588
851
|
function countOccurrences(str, word) {
|
|
589
852
|
if (word === "") return 0;
|
|
@@ -768,5 +1031,7 @@ function parse_db_rows(db_row) {
|
|
|
768
1031
|
return output_string;
|
|
769
1032
|
}
|
|
770
1033
|
export {
|
|
771
|
-
api
|
|
1034
|
+
api,
|
|
1035
|
+
readJSONFile,
|
|
1036
|
+
run_chat_pipeline
|
|
772
1037
|
};
|