@sjcrh/proteinpaint-server 2.190.2-0 → 2.191.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,176 +0,0 @@
1
- import { mayLog } from "#src/helpers.ts";
2
- import serverconfig from "../src/serverconfig.js";
3
- import { run_R } from "@sjcrh/proteinpaint-r";
4
- import { run_rust } from "@sjcrh/proteinpaint-rust";
5
- import { formatElapsedTime } from "#shared";
6
- import { renderVolcano } from "../src/renderVolcano.ts";
7
- import { cacheOrRecompute } from "#src/utils/cacheOrRecompute.ts";
8
- import {
9
- buildGroupValues,
10
- canonicalizeSamplelst,
11
- resolveDaContext
12
- } from "#src/utils/sampleGroups.ts";
13
- function init({ genomes }) {
14
- return async (req, res) => {
15
- try {
16
- const q = req.query;
17
- if (q.preAnalysis) {
18
- const { ds, term_results, term_results2 } = await resolveDaContext(q, genomes);
19
- const groups = resolveSampleGroups(q, ds, term_results, term_results2);
20
- const group1Name = q.samplelst.groups[0].name;
21
- const group2Name = q.samplelst.groups[1].name;
22
- res.send({
23
- data: {
24
- [group1Name]: groups.group1names.length,
25
- [group2Name]: groups.group2names.length,
26
- ...groups.alerts.length ? { alert: groups.alerts.join(" | ") } : {}
27
- }
28
- });
29
- return;
30
- }
31
- const { result, cacheId, images } = await loadDeForResponse(q, genomes);
32
- const rendered = await renderVolcano(result.geneRows, q.volcanoRender);
33
- rendered.cacheId = cacheId;
34
- const output = {
35
- data: rendered,
36
- sample_size1: result.sample_size1,
37
- sample_size2: result.sample_size2,
38
- method: result.method
39
- };
40
- if (images.length) output.images = images;
41
- if (result.bcv != null) output.bcv = result.bcv;
42
- res.send(output);
43
- } catch (e) {
44
- res.status(e.status || 500).send({ status: "error", error: e.message || e, code: e.code });
45
- if (e instanceof Error && e.stack) console.log(e);
46
- }
47
- };
48
- }
49
- function deKeyInputs(req) {
50
- return {
51
- genome: req.genome,
52
- dslabel: req.dslabel,
53
- samplelst: canonicalizeSamplelst(req.samplelst),
54
- min_count: req.min_count,
55
- min_total_count: req.min_total_count,
56
- cpm_cutoff: req.cpm_cutoff,
57
- method: req.method ?? null,
58
- tw: req.tw ?? null,
59
- tw2: req.tw2 ?? null,
60
- filter: req.filter ?? null,
61
- filter0: req.filter0 ?? null
62
- };
63
- }
64
- async function loadDeForResponse(req, genomes) {
65
- const { result, cacheId } = await getDeCacheResult(req, genomes);
66
- const images = [];
67
- if (result.qlImage) images.push(result.qlImage);
68
- if (result.mdsImage) images.push(result.mdsImage);
69
- return { result, cacheId, images };
70
- }
71
- async function getDeCacheResult(req, genomes) {
72
- const { result, cacheId } = await cacheOrRecompute({
73
- computeArgument: deKeyInputs(req),
74
- cacheSubdir: "de",
75
- computeFresh: async () => {
76
- const { ds, term_results, term_results2 } = await resolveDaContext(req, genomes);
77
- return runDeFresh(req, ds, term_results, term_results2);
78
- }
79
- });
80
- return { result, cacheId };
81
- }
82
- async function runDeFresh(param, ds, term_results, term_results2) {
83
- const groups = resolveSampleGroups(param, ds, term_results, term_results2);
84
- if (groups.alerts.length) throw new Error(groups.alerts.join(" | "));
85
- const q = ds.queries.rnaseqGeneCount;
86
- const expression_input = {
87
- case: groups.group2names.join(","),
88
- control: groups.group1names.join(","),
89
- data_type: "do_DE",
90
- input_file: q.file,
91
- cachedir: serverconfig.cachedir,
92
- storage_type: q.storage_type,
93
- DE_method: param.method,
94
- mds_cutoff: 1e4,
95
- min_count: param.min_count,
96
- min_total_count: param.min_total_count,
97
- cpm_cutoff: param.cpm_cutoff
98
- };
99
- if (param.tw) {
100
- expression_input.conf1 = [...groups.conf1_group2, ...groups.conf1_group1];
101
- expression_input.conf1_mode = param.tw.q.mode;
102
- if (new Set(expression_input.conf1).size === 1) throw new Error("Confounding variable 1 has only one value");
103
- }
104
- if (param.tw2) {
105
- expression_input.conf2 = [...groups.conf2_group2, ...groups.conf2_group1];
106
- expression_input.conf2_mode = param.tw2.q.mode;
107
- if (new Set(expression_input.conf2).size === 1) throw new Error("Confounding variable 2 has only one value");
108
- }
109
- const small = groups.group1names.length <= 8 && groups.group2names.length <= 8;
110
- const engine = small || param.method === "edgeR" || param.method === "limma" ? "edgeR" : "wilcoxon";
111
- if (engine === "edgeR") {
112
- const time12 = (/* @__PURE__ */ new Date()).valueOf();
113
- const result2 = JSON.parse(await run_R("edge_newh5.R", JSON.stringify(expression_input)));
114
- mayLog("Time taken to run edgeR:", formatElapsedTime(Date.now() - time12));
115
- param.method = "edgeR";
116
- const qlImage = deImageFromB64(result2.ql_image_b64, "ql_image");
117
- const mdsImage = deImageFromB64(result2.mds_image_b64, "mds_image");
118
- const cacheResult2 = {
119
- geneRows: result2.gene_data,
120
- sample_size1: result2.num_controls[0],
121
- sample_size2: result2.num_cases[0],
122
- method: param.method,
123
- bcv: result2.bcv && result2.bcv[0] != null ? result2.bcv[0] : void 0,
124
- ...qlImage ? { qlImage } : {},
125
- ...mdsImage ? { mdsImage } : {}
126
- };
127
- return cacheResult2;
128
- }
129
- const time1 = (/* @__PURE__ */ new Date()).valueOf();
130
- const result = JSON.parse(await run_rust("DEanalysis", JSON.stringify(expression_input)));
131
- mayLog("Time taken to run rust DE pipeline:", formatElapsedTime(Date.now() - time1));
132
- param.method = "wilcoxon";
133
- const cacheResult = {
134
- geneRows: result,
135
- sample_size1: groups.group1names.length,
136
- sample_size2: groups.group2names.length,
137
- method: param.method
138
- };
139
- return cacheResult;
140
- }
141
- function resolveSampleGroups(param, ds, term_results, term_results2) {
142
- if (param.samplelst?.groups?.length != 2) throw new Error(".samplelst.groups.length!=2");
143
- if (param.samplelst.groups[0].values?.length < 1) throw new Error("samplelst.groups[0].values.length<1");
144
- if (param.samplelst.groups[1].values?.length < 1) throw new Error("samplelst.groups[1].values.length<1");
145
- const q = ds.queries.rnaseqGeneCount;
146
- if (!q) throw new Error("rnaseqGeneCount query missing on ds");
147
- if (!q.file) throw new Error("unknown data type for rnaseqGeneCount");
148
- if (!q.storage_type) throw new Error("storage_type is not defined");
149
- const g1 = buildGroupValues(param.samplelst.groups[0].values, q, ds, param.tw, param.tw2, term_results, term_results2);
150
- const g2 = buildGroupValues(param.samplelst.groups[1].values, q, ds, param.tw, param.tw2, term_results, term_results2);
151
- const alerts = [];
152
- if (g1.names.length < 1) alerts.push("sample size of group1 < 1");
153
- if (g2.names.length < 1) alerts.push("sample size of group2 < 1");
154
- const commonnames = g1.names.filter((x) => g2.names.includes(x));
155
- if (commonnames.length) alerts.push(`Common elements found between both groups: ${commonnames.join(", ")}`);
156
- return {
157
- group1names: g1.names,
158
- group2names: g2.names,
159
- conf1_group1: g1.conf1,
160
- conf1_group2: g2.conf1,
161
- conf2_group1: g1.conf2,
162
- conf2_group2: g2.conf2,
163
- alerts
164
- };
165
- }
166
- function deImageFromB64(b64, key) {
167
- if (!b64) return null;
168
- const padding = b64.endsWith("==") ? 2 : b64.endsWith("=") ? 1 : 0;
169
- const size = Math.floor(b64.length * 3 / 4) - padding;
170
- return { src: `data:image/png;base64,${b64}`, size, key };
171
- }
172
- export {
173
- getDeCacheResult,
174
- init,
175
- resolveSampleGroups
176
- };
@@ -1,123 +0,0 @@
1
- import { getOrderedLabels } from "#src/termdb.barchart.js";
2
- import { getData } from "#src/termdb.matrix.js";
3
- function init({ genomes }) {
4
- return async (req, res) => {
5
- const q = req.query;
6
- try {
7
- const g = genomes[req.query.genome];
8
- if (!g) throw "invalid genome name";
9
- const ds = g.datasets[req.query.dslabel];
10
- if (!ds) throw "invalid dataset name";
11
- const tdb = ds.cohort.termdb;
12
- if (!tdb) throw "invalid termdb object";
13
- await trigger_getcategories(q, res, tdb, ds);
14
- } catch (e) {
15
- res.send({ error: e?.message || e });
16
- if (e instanceof Error && e.stack) console.log(e);
17
- }
18
- };
19
- }
20
- async function trigger_getcategories(q, res, tdb, ds) {
21
- if (!q.tw.$id) q.tw.$id = "_";
22
- const $id = q.tw.$id;
23
- const arg = {
24
- filter: q.filter,
25
- filter0: q.filter0,
26
- terms: [q.tw],
27
- currentGeneNames: q.currentGeneNames,
28
- // optional, from mds3 mayAddGetCategoryArgs()
29
- rglst: q.rglst,
30
- // optional, from mds3 mayAddGetCategoryArgs()
31
- __protected__: q.__protected__,
32
- __abortSignal: q.__abortSignal
33
- };
34
- const data = await getData(arg, ds);
35
- if (data.error) throw data.error;
36
- const [lst, orderedLabels] = getCategories(data, q, ds, $id);
37
- res.send({
38
- lst,
39
- orderedLabels
40
- });
41
- }
42
- function getCategories(data, q, ds, $id) {
43
- const lst = [];
44
- if (q.tw.term.type == "geneVariant" && q.tw.q.type != "predefined-groupset" && q.tw.q.type != "custom-groupset") {
45
- const samples = data.samples;
46
- const dtClassMap = /* @__PURE__ */ new Map();
47
- if (ds.assayAvailability?.byDt) {
48
- for (const [dtType, _dtValue] of Object.entries(ds.assayAvailability.byDt)) {
49
- const dtValue = _dtValue;
50
- if (dtValue.byOrigin) {
51
- dtClassMap.set(parseInt(dtType), { byOrigin: { germline: {}, somatic: {} } });
52
- }
53
- }
54
- }
55
- const sampleCountedFor = /* @__PURE__ */ new Set();
56
- for (const sampleData of Object.values(samples)) {
57
- const key = $id;
58
- if (!Object.keys(sampleData).includes(key)) continue;
59
- const values = sampleData[key].values;
60
- sampleCountedFor.clear();
61
- for (const value of values) {
62
- if (!dtClassMap.has(value.dt)) {
63
- dtClassMap.set(value.dt, {});
64
- }
65
- const dtClasses = dtClassMap.get(value.dt);
66
- if (dtClasses.byOrigin) {
67
- if (!dtClasses.byOrigin[value.origin][value.class]) {
68
- dtClasses.byOrigin[value.origin][value.class] = 1;
69
- sampleCountedFor.add(`${value.dt} ${value.origin} ${value.class}`);
70
- }
71
- if (!sampleCountedFor.has(`${value.dt} ${value.origin} ${value.class}`)) {
72
- sampleCountedFor.add(`${value.dt} ${value.origin} ${value.class}`);
73
- dtClasses.byOrigin[value.origin][value.class] += 1;
74
- }
75
- } else {
76
- if (!dtClasses[value.class]) {
77
- sampleCountedFor.add(`${value.dt} ${value.class}`);
78
- dtClasses[value.class] = 1;
79
- }
80
- if (!sampleCountedFor.has(`${value.dt} ${value.class}`)) {
81
- sampleCountedFor.add(`${value.dt} ${value.class}`);
82
- dtClasses[value.class] += 1;
83
- }
84
- }
85
- }
86
- }
87
- for (const [dt, classes] of dtClassMap) {
88
- lst.push({
89
- dt,
90
- classes
91
- });
92
- }
93
- } else {
94
- const key2count = /* @__PURE__ */ new Map();
95
- for (const sid in data.samples) {
96
- const v = data.samples[sid][$id];
97
- if (!v) continue;
98
- if (!("key" in v)) continue;
99
- key2count.set(v.key, 1 + (key2count.get(v.key) || 0));
100
- }
101
- for (const [key, count] of key2count) {
102
- lst.push({
103
- samplecount: count,
104
- key,
105
- label: data.refs?.byTermId?.[$id]?.events?.find((e) => e.event === key).label || q.tw.term?.values?.[key]?.label || key
106
- });
107
- }
108
- }
109
- const orderedLabels = getOrderedLabels(
110
- q.tw.term,
111
- data.refs?.byTermId?.[$id]?.bins || [],
112
- data.refs?.byTermId?.[$id]?.events,
113
- q.tw.q
114
- );
115
- if (orderedLabels.length) {
116
- lst.sort((a, b) => orderedLabels.indexOf(a.label) - orderedLabels.indexOf(b.label));
117
- }
118
- return [lst, orderedLabels];
119
- }
120
- export {
121
- getCategories,
122
- init
123
- };
@@ -1,203 +0,0 @@
1
- import { mayLog } from "#src/helpers.ts";
2
- import { formatElapsedTime } from "#shared";
3
- import { readJSONFile, parse_geneset_db, getChatRelatedPlotTypes } from "./chat/utils.ts";
4
- import { classifyQuery } from "./chat/classify1.ts";
5
- import { classifyPlotType } from "./chat/plot.ts";
6
- import { classifyNotPlot } from "./chat/classify2.ts";
7
- import { inferScaffold } from "./chat/scaffold.ts";
8
- import serverconfig from "../src/serverconfig.js";
9
- import { getDsAllowedTermTypes } from "./termdb.config.ts";
10
- import { phrase2entity } from "./chat/phrase2entity.ts";
11
- import { inferTermObjFromEntity } from "./chat/entity2termObj.ts";
12
- import { resolveToTwTvs } from "./chat/entity2twTvs.ts";
13
- import { answerDataQueries } from "./chat/dataQueries.ts";
14
- import path from "path";
15
- import fs from "fs";
16
- import { resolveToPlotState } from "./chat/scaffold2state.ts";
17
- function init({ genomes }) {
18
- return async (req, res) => {
19
- const q = req.query;
20
- try {
21
- const genome = genomes[q.genome];
22
- if (!genome) throw "invalid genome";
23
- const ds = genome.datasets?.[q.dslabel];
24
- if (!ds) throw "invalid dslabel";
25
- if (!ds.queries.chat) {
26
- return res.send({
27
- type: "text",
28
- text: "Only search functionality supported for this data. No chat functionality supported."
29
- });
30
- }
31
- const aiFilesDir = serverconfig.binpath + "/../../dataset/ai/" + q.dslabel;
32
- let agentFiles = [];
33
- try {
34
- agentFiles = await fs.readdirSync(aiFilesDir).filter((file) => file.endsWith(".json"));
35
- } catch (err) {
36
- if (err.code === "ENOENT") throw new Error(`Directory not found: ${aiFilesDir}`);
37
- if (err.code === "ENOTDIR") throw new Error(`Path is not a directory: ${aiFilesDir}`);
38
- throw err;
39
- }
40
- const llm = serverconfig.llm;
41
- if (!llm) throw "serverconfig.llm is not configured";
42
- if (llm.provider !== "SJ" && llm.provider !== "ollama" && llm.provider !== "huggingface" && llm.provider !== "azure") {
43
- throw "llm.provider must be 'SJ', 'ollama', 'huggingface', or 'azure'";
44
- }
45
- let rawFilter;
46
- if (typeof q.filter === "string") {
47
- try {
48
- rawFilter = JSON.parse(q.filter);
49
- } catch (e) {
50
- throw new Error("Failed to parse filter JSON string: " + e);
51
- }
52
- } else {
53
- rawFilter = q.filter;
54
- }
55
- const filter = rawFilter && typeof rawFilter === "object" ? rawFilter : {};
56
- const lst = Array.isArray(filter.lst) ? filter.lst : [];
57
- const cohortFilter = lst.find((item) => item.tag === "cohortFilter");
58
- const cohortKey = cohortFilter ? cohortFilter.tvs.values[0].key : "";
59
- const supportedPlotTypes = ds.cohort.termdb.q?.getSupportedChartTypes(req)?.[cohortKey];
60
- const chatSupportedPlotTypes = getChatRelatedPlotTypes(supportedPlotTypes);
61
- const genedb = serverconfig.tpmasterdir + "/" + genome.genedb.dbfile;
62
- const allowedTermTypes = getDsAllowedTermTypes(ds);
63
- const ai_output_json = await run_chat_pipeline(
64
- q.prompt,
65
- llm,
66
- ds,
67
- genedb,
68
- agentFiles,
69
- aiFilesDir,
70
- chatSupportedPlotTypes,
71
- allowedTermTypes,
72
- genome
73
- // testing
74
- );
75
- mayLog("From init: Final AI output JSON:", JSON.stringify(ai_output_json));
76
- res.send(ai_output_json);
77
- } catch (e) {
78
- if (e.stack) mayLog(e.stack);
79
- res.send({ error: e?.message || e });
80
- }
81
- };
82
- }
83
- async function run_chat_pipeline(userPrompt, llm, ds, genedb, agentFiles, aiFilesDir, supportedPlotTypes, allowedTermTypes, genome) {
84
- if (!fs.existsSync(path.join(aiFilesDir, "main.json")))
85
- throw "Main data file is not specified for dataset:" + ds.label;
86
- const dataset_json = await readJSONFile(path.join(aiFilesDir, "main.json"));
87
- const time1 = (/* @__PURE__ */ new Date()).valueOf();
88
- const class_response = await classifyQuery(userPrompt, llm);
89
- mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
90
- let ai_output_json;
91
- if (class_response.type === "notplot") {
92
- const time2 = (/* @__PURE__ */ new Date()).valueOf();
93
- const notPlotResult = await classifyNotPlot(userPrompt, llm, agentFiles, aiFilesDir);
94
- mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
95
- if (notPlotResult.type === "html") {
96
- ai_output_json = notPlotResult;
97
- } else {
98
- ai_output_json = {
99
- type: "text",
100
- text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
101
- };
102
- }
103
- } else if (class_response.type === "binaryQuery") {
104
- const answer = await answerDataQueries(userPrompt, llm, allowedTermTypes);
105
- if (!answer) throw "Couldn't decide if this is data related query!";
106
- mayLog("Data Binary Query: ", answer);
107
- ai_output_json = answer;
108
- } else if (class_response.type === "plot") {
109
- let time = (/* @__PURE__ */ new Date()).valueOf();
110
- const plotType = await classifyPlotType(userPrompt, llm);
111
- mayLog("Time taken to classify plot type:", formatElapsedTime(Date.now() - time));
112
- if (!supportedPlotTypes.includes(plotType)) {
113
- const log = 'Plot type: "' + plotType + '" is not supported.';
114
- ai_output_json = {
115
- type: "text",
116
- text: log
117
- };
118
- mayLog(log);
119
- return ai_output_json;
120
- }
121
- const genes_list = await parse_geneset_db(genedb);
122
- mayLog("#################################################");
123
- mayLog("####### First phase: Infer Plot Scaffolds #######");
124
- mayLog("#################################################");
125
- time = (/* @__PURE__ */ new Date()).valueOf();
126
- const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
127
- const scaffoldResult = await inferScaffold(
128
- userPrompt,
129
- plotType,
130
- llm,
131
- genome,
132
- genes_list,
133
- allowedTermTypes,
134
- dataset_json,
135
- ds,
136
- dataset_db
137
- );
138
- mayLog("ScaffoldResult: ", scaffoldResult);
139
- if (plotType === "hiercluster" && "plot" in scaffoldResult && scaffoldResult.type === "plot" || "text" in scaffoldResult && scaffoldResult.type === "text") {
140
- return scaffoldResult;
141
- }
142
- mayLog("Time taken to infer scaffold:", formatElapsedTime(Date.now() - time));
143
- if (!scaffoldResult)
144
- throw "Scaffold result is empty or undefined, which is unexpected. Please check the inferScaffold agent for potential issues.";
145
- if ("type" in scaffoldResult && scaffoldResult.type === "text") {
146
- return scaffoldResult;
147
- }
148
- const subplotType = scaffoldResult.plotType === "summary" ? scaffoldResult.chartType : void 0;
149
- mayLog("#################################################");
150
- mayLog("####### Second phase: From Scaffolds's phrases infer Entities #######");
151
- mayLog("#################################################");
152
- time = (/* @__PURE__ */ new Date()).valueOf();
153
- const phrase2entityResult = await phrase2entity(
154
- scaffoldResult,
155
- plotType,
156
- llm,
157
- genes_list,
158
- dataset_json,
159
- ds,
160
- genome
161
- );
162
- mayLog("Time taken to phrase 2 entity:", formatElapsedTime(Date.now() - time));
163
- if ("type" in phrase2entityResult && phrase2entityResult.type === "text") {
164
- return phrase2entityResult;
165
- }
166
- mayLog(phrase2entityResult);
167
- mayLog("#################################################");
168
- mayLog("####### Third phase: From Entities infer Term Objects #######");
169
- mayLog("#################################################");
170
- time = (/* @__PURE__ */ new Date()).valueOf();
171
- const termObj = await inferTermObjFromEntity(
172
- phrase2entityResult,
173
- plotType,
174
- llm,
175
- dataset_db,
176
- genes_list,
177
- genome
178
- );
179
- mayLog("Time taken to infer term objects:", formatElapsedTime(Date.now() - time));
180
- mayLog("Inferred termObj from entity:", JSON.stringify(termObj));
181
- mayLog("#################################################");
182
- mayLog("####### Fourth phase: From Term Objects to TwTvs Objects #######");
183
- mayLog("#################################################");
184
- time = (/* @__PURE__ */ new Date()).valueOf();
185
- const twTvsObj = await resolveToTwTvs(termObj, plotType, llm, dataset_db, genome);
186
- mayLog("Time taken to resolve to TwTvs object from termObj:", formatElapsedTime(Date.now() - time));
187
- if ("type" in twTvsObj && twTvsObj.type === "text") {
188
- return twTvsObj;
189
- }
190
- mayLog("twTvsObj:", twTvsObj);
191
- mayLog("#################################################");
192
- mayLog("####### Fifth/Final phase: From TwTvs Objects to Plot States #######");
193
- mayLog("#################################################");
194
- time = (/* @__PURE__ */ new Date()).valueOf();
195
- ai_output_json = resolveToPlotState(twTvsObj, plotType, subplotType);
196
- mayLog("Time taken to resolve to plot state:", formatElapsedTime(Date.now() - time));
197
- }
198
- return ai_output_json;
199
- }
200
- export {
201
- init,
202
- run_chat_pipeline
203
- };