@sjcrh/proteinpaint-server 2.184.0 → 2.185.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,11 @@
1
- import fs from "fs";
2
1
  import path from "path";
3
2
  import { diffExpPayload } from "#types/checkers";
4
- import { run_rust } from "@sjcrh/proteinpaint-rust";
5
- import { getData } from "../src/termdb.matrix.js";
6
- import { get_ds_tdb } from "../src/termdb.js";
7
- import { run_R } from "@sjcrh/proteinpaint-r";
8
3
  import { mayLog } from "#src/helpers.ts";
9
4
  import serverconfig from "../src/serverconfig.js";
10
- import { imageSize } from "image-size";
11
5
  import { get_header_txt } from "#src/utils.js";
12
- import { formatElapsedTime } from "#shared";
6
+ import { run_rust } from "@sjcrh/proteinpaint-rust";
7
+ import { renderVolcano } from "../src/renderVolcano.ts";
8
+ import { readCacheFileOrRecompute, resolveDeContext, resolveSampleGroups } from "../src/diffAnalysis.ts";
13
9
  const api = {
14
10
  endpoint: "termdb/DE",
15
11
  methods: {
@@ -27,244 +23,41 @@ function init({ genomes }) {
27
23
  return async (req, res) => {
28
24
  try {
29
25
  const q = req.query;
30
- const genome = genomes[q.genome];
31
- if (!genome) throw new Error("invalid genome");
32
- const [ds] = get_ds_tdb(genome, q);
33
- let term_results = [];
34
- if (q.tw) {
35
- const terms = [q.tw];
36
- term_results = await getData(
37
- {
38
- filter: q.filter,
39
- filter0: q.filter0,
40
- terms
41
- },
42
- ds
43
- );
44
- if (term_results.error) throw new Error(term_results.error);
45
- }
46
- let term_results2 = [];
47
- if (q.tw2) {
48
- const terms2 = [q.tw2];
49
- term_results2 = await getData(
50
- {
51
- filter: q.filter,
52
- filter0: q.filter0,
53
- terms: terms2
54
- },
55
- ds
56
- );
57
- if (term_results2.error) throw new Error(term_results2.error);
26
+ if (q.preAnalysis) {
27
+ const { ds, term_results, term_results2 } = await resolveDeContext(q, genomes);
28
+ const groups = resolveSampleGroups(q, ds, term_results, term_results2);
29
+ const group1Name = q.samplelst.groups[0].name;
30
+ const group2Name = q.samplelst.groups[1].name;
31
+ res.send({
32
+ data: {
33
+ [group1Name]: groups.group1names.length,
34
+ [group2Name]: groups.group2names.length,
35
+ ...groups.alerts.length ? { alert: groups.alerts.join(" | ") } : {}
36
+ }
37
+ });
38
+ return;
58
39
  }
59
- const results = await run_DE(req.query, ds, term_results, term_results2);
60
- if (!results || !results.data) throw new Error("No data available");
61
- res.send(results);
40
+ const { cacheId, geneData, sample_size1, sample_size2, method, images, bcv } = await readCacheFileOrRecompute({
41
+ daRequest: q,
42
+ genomes
43
+ });
44
+ const rendered = await renderVolcano(geneData, q.volcanoRender);
45
+ rendered.cacheId = cacheId;
46
+ const output = {
47
+ data: rendered,
48
+ sample_size1,
49
+ sample_size2,
50
+ method,
51
+ images
52
+ };
53
+ if (bcv != null) output.bcv = bcv;
54
+ res.send(output);
62
55
  } catch (e) {
63
56
  res.send({ status: "error", error: e.message || e });
64
57
  if (e instanceof Error && e.stack) console.log(e);
65
58
  }
66
59
  };
67
60
  }
68
- async function run_DE(param, ds, term_results, term_results2) {
69
- if (param.samplelst?.groups?.length != 2) throw new Error(".samplelst.groups.length!=2");
70
- if (param.samplelst.groups[0].values?.length < 1) throw new Error("samplelst.groups[0].values.length<1");
71
- if (param.samplelst.groups[1].values?.length < 1) throw new Error("samplelst.groups[1].values.length<1");
72
- const q = ds.queries.rnaseqGeneCount;
73
- if (!q) return;
74
- if (!q.file) throw new Error("unknown data type for rnaseqGeneCount");
75
- if (!q.storage_type) throw new Error("storage_type is not defined");
76
- param.storage_type = q.storage_type;
77
- const group1names = [];
78
- const conf1_group1 = [];
79
- const conf2_group1 = [];
80
- for (const s of param.samplelst.groups[0].values) {
81
- if (!Number.isInteger(s.sampleId)) continue;
82
- const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
83
- if (!n) continue;
84
- if (q.allSampleSet.has(n)) {
85
- if (param.tw && !param.tw2) {
86
- if (term_results.samples[s.sampleId]) {
87
- if (param.tw.q.mode == "continuous") {
88
- conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
89
- } else {
90
- conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
91
- }
92
- group1names.push(n);
93
- }
94
- } else if (!param.tw && param.tw2) {
95
- if (term_results2.samples[s.sampleId]) {
96
- if (param.tw2.q.mode == "continuous") {
97
- conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
98
- } else {
99
- conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
100
- }
101
- group1names.push(n);
102
- }
103
- } else if (param.tw && param.tw2) {
104
- if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
105
- if (param.tw.q.mode == "continuous") {
106
- conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
107
- } else {
108
- conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
109
- }
110
- if (param.tw2.q.mode == "continuous") {
111
- conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
112
- } else {
113
- conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
114
- }
115
- group1names.push(n);
116
- }
117
- } else {
118
- group1names.push(n);
119
- }
120
- }
121
- }
122
- const group2names = [];
123
- const conf1_group2 = [];
124
- const conf2_group2 = [];
125
- for (const s of param.samplelst.groups[1].values) {
126
- if (!Number.isInteger(s.sampleId)) continue;
127
- const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
128
- if (!n) continue;
129
- if (q.allSampleSet.has(n)) {
130
- if (param.tw && !param.tw2) {
131
- if (term_results.samples[s.sampleId]) {
132
- if (param.tw.q.mode == "continuous") {
133
- conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
134
- } else {
135
- conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
136
- }
137
- group2names.push(n);
138
- }
139
- } else if (!param.tw && param.tw2) {
140
- if (term_results2.samples[s.sampleId]) {
141
- if (param.tw2.q.mode == "continuous") {
142
- conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
143
- } else {
144
- conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
145
- }
146
- group2names.push(n);
147
- }
148
- } else if (param.tw && param.tw2) {
149
- if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
150
- if (param.tw.q.mode == "continuous") {
151
- conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
152
- } else {
153
- conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
154
- }
155
- if (param.tw2.q.mode == "continuous") {
156
- conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
157
- } else {
158
- conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
159
- }
160
- group2names.push(n);
161
- }
162
- } else {
163
- group2names.push(n);
164
- }
165
- }
166
- }
167
- const sample_size1 = group1names.length;
168
- const sample_size2 = group2names.length;
169
- const alerts = validateGroups(sample_size1, sample_size2, group1names, group2names);
170
- if (param.preAnalysis) {
171
- const group1Name = param.samplelst.groups[0].name;
172
- const group2Name = param.samplelst.groups[1].name;
173
- return {
174
- data: {
175
- [group1Name]: sample_size1,
176
- [group2Name]: sample_size2,
177
- ...alerts.length ? { alert: alerts.join(" | ") } : {}
178
- }
179
- };
180
- }
181
- if (alerts.length) throw new Error(alerts.join(" | "));
182
- const cases_string = group2names.map((i) => i).join(",");
183
- const controls_string = group1names.map((i) => i).join(",");
184
- const expression_input = {
185
- case: cases_string,
186
- control: controls_string,
187
- data_type: "do_DE",
188
- input_file: q.file,
189
- cachedir: serverconfig.cachedir,
190
- min_count: param.min_count,
191
- min_total_count: param.min_total_count,
192
- cpm_cutoff: param.cpm_cutoff,
193
- storage_type: param.storage_type,
194
- DE_method: param.method,
195
- mds_cutoff: 1e4
196
- // If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive. Number of genes * Number of samples < mds_cutoff for mds generation
197
- };
198
- if (param.tw) {
199
- expression_input.conf1 = [...conf1_group2, ...conf1_group1];
200
- expression_input.conf1_mode = param.tw.q.mode;
201
- if (new Set(expression_input.conf1).size === 1) {
202
- throw new Error("Confounding variable 1 has only one value");
203
- }
204
- }
205
- if (param.tw2) {
206
- expression_input.conf2 = [...conf2_group2, ...conf2_group1];
207
- expression_input.conf2_mode = param.tw2.q.mode;
208
- if (new Set(expression_input.conf2).size === 1) {
209
- throw new Error("Confounding variable 2 has only one value");
210
- }
211
- }
212
- const sample_size_limit = 8;
213
- if (group1names.length <= sample_size_limit && group2names.length <= sample_size_limit || param.method == "edgeR" || param.method == "limma") {
214
- const time12 = (/* @__PURE__ */ new Date()).valueOf();
215
- const result2 = JSON.parse(await run_R("edge_newh5.R", JSON.stringify(expression_input)));
216
- mayLog("Time taken to run edgeR:", formatElapsedTime(Date.now() - time12));
217
- param.method = "edgeR";
218
- const ql_imagePath = path.join(serverconfig.cachedir, result2.edgeR_ql_image_name[0]);
219
- mayLog("ql_imagePath:", ql_imagePath);
220
- await readFileAndDelete(ql_imagePath, "ql_image", result2);
221
- if (result2.edgeR_mds_image_name) {
222
- const mds_imagePath = path.join(serverconfig.cachedir, result2.edgeR_mds_image_name[0]);
223
- mayLog("mds_imagePath:", mds_imagePath);
224
- await readFileAndDelete(mds_imagePath, "mds_image", result2);
225
- }
226
- const images = [result2.ql_image];
227
- if (result2.mds_image) images.push(result2.mds_image);
228
- const output = {
229
- data: result2.gene_data,
230
- sample_size2: result2.num_cases[0],
231
- sample_size1: result2.num_controls[0],
232
- method: param.method,
233
- images
234
- };
235
- if (result2.bcv && result2.bcv[0] !== null && result2.bcv[0] !== void 0) {
236
- output.bcv = result2.bcv[0];
237
- }
238
- return output;
239
- }
240
- const time1 = (/* @__PURE__ */ new Date()).valueOf();
241
- const result = JSON.parse(await run_rust("DEanalysis", JSON.stringify(expression_input)));
242
- mayLog("Time taken to run rust DE pipeline:", formatElapsedTime(Date.now() - time1));
243
- param.method = "wilcoxon";
244
- return { data: result, sample_size1, sample_size2, method: param.method };
245
- }
246
- function validateGroups(sample_size1, sample_size2, group1names, group2names) {
247
- const alerts = [];
248
- if (sample_size1 < 1) alerts.push("sample size of group1 < 1");
249
- if (sample_size2 < 1) alerts.push("sample size of group2 < 1");
250
- const commonnames = group1names.filter((x) => group2names.includes(x));
251
- if (commonnames.length) alerts.push(`Common elements found between both groups: ${commonnames.join(", ")}`);
252
- return alerts;
253
- }
254
- async function readFileAndDelete(file, key, response) {
255
- const plot = await fs.promises.readFile(file);
256
- const plotBuffer = Buffer.from(plot).toString("base64");
257
- const { width, height } = imageSize(file);
258
- const obj = {
259
- src: `data:image/png;base64,${plotBuffer}`,
260
- size: `${width}x${height}`,
261
- key
262
- };
263
- response[key] = obj;
264
- fs.unlink(file, (err) => {
265
- if (err) throw new Error(err.message || String(err));
266
- });
267
- }
268
61
  async function validate_query_rnaseqGeneCount(ds) {
269
62
  const q = ds.queries.rnaseqGeneCount;
270
63
  if (!q) return;
@@ -0,0 +1,191 @@
1
+ import { ChatPayload } from "#types/checkers";
2
+ import { mayLog } from "#src/helpers.ts";
3
+ import { formatElapsedTime } from "#shared";
4
+ import { readJSONFile, parse_geneset_db } from "./chat/utils.ts";
5
+ import { classifyQuery } from "./chat/classify1.ts";
6
+ import { classifyPlotType } from "./chat/plot.ts";
7
+ import { classifyNotPlot } from "./chat/classify2.ts";
8
+ import { inferScaffold } from "./chat/scaffold.ts";
9
+ import serverconfig from "../src/serverconfig.js";
10
+ import { getDsAllowedTermTypes } from "./termdb.config.ts";
11
+ import { phrase2entity } from "./chat/phrase2entity.ts";
12
+ import { inferTermObjFromEntity } from "./chat/entity2termObj.ts";
13
+ import { resolveToTwTvs } from "./chat/entity2twTvs.ts";
14
+ import path from "path";
15
+ import fs from "fs";
16
+ import { resolveToPlotState } from "./chat/scaffold2state.ts";
17
+ const api = {
18
+ endpoint: "termdb/chat3",
19
+ methods: {
20
+ get: {
21
+ ...ChatPayload,
22
+ init
23
+ },
24
+ post: {
25
+ ...ChatPayload,
26
+ init
27
+ }
28
+ }
29
+ };
30
+ function init({ genomes }) {
31
+ return async (req, res) => {
32
+ const q = req.query;
33
+ try {
34
+ const g = genomes[q.genome];
35
+ if (!g) throw "invalid genome";
36
+ const ds = g.datasets?.[q.dslabel];
37
+ if (!ds) throw "invalid dslabel";
38
+ const aiFilesDir = serverconfig.binpath + "/../../dataset/ai/" + q.dslabel;
39
+ let agentFiles = [];
40
+ try {
41
+ agentFiles = await fs.readdirSync(aiFilesDir).filter((file) => file.endsWith(".json"));
42
+ } catch (err) {
43
+ if (err.code === "ENOENT") throw new Error(`Directory not found: ${aiFilesDir}`);
44
+ if (err.code === "ENOTDIR") throw new Error(`Path is not a directory: ${aiFilesDir}`);
45
+ throw err;
46
+ }
47
+ const llm = serverconfig.llm;
48
+ if (!llm) throw "serverconfig.llm is not configured";
49
+ if (llm.provider !== "SJ" && llm.provider !== "ollama" && llm.provider !== "huggingface" && llm.provider !== "azure") {
50
+ throw "llm.provider must be 'SJ', 'ollama', 'huggingface', or 'azure'";
51
+ }
52
+ const rawFilter = typeof q.filter === "string" ? JSON.parse(q.filter) : q.filter;
53
+ const filter = rawFilter && typeof rawFilter === "object" ? rawFilter : {};
54
+ const lst = Array.isArray(filter.lst) ? filter.lst : [];
55
+ const cohortFilter = lst.find((item) => item.tag === "cohortFilter");
56
+ const cohortKey = cohortFilter ? cohortFilter.tvs.values[0].key : "";
57
+ const supportedChartTypes = ds.cohort.termdb.q?.getSupportedChartTypes(req)?.[cohortKey];
58
+ const genedb = serverconfig.tpmasterdir + "/" + g.genedb.dbfile;
59
+ const _allowedTermTypes = getDsAllowedTermTypes(ds);
60
+ const ai_output_json = await run_chat_pipeline(
61
+ q.prompt,
62
+ llm,
63
+ ds,
64
+ genedb,
65
+ agentFiles,
66
+ aiFilesDir,
67
+ supportedChartTypes,
68
+ _allowedTermTypes
69
+ // testing
70
+ );
71
+ mayLog("From init: Final AI output JSON:", JSON.stringify(ai_output_json));
72
+ res.send(ai_output_json);
73
+ } catch (e) {
74
+ if (e.stack) mayLog(e.stack);
75
+ res.send({ error: e?.message || e });
76
+ }
77
+ };
78
+ }
79
+ async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFilesDir, supportedChartTypes, _allowedTermTypes) {
80
+ if (!fs.existsSync(path.join(aiFilesDir, "main.json")))
81
+ throw "Main data file is not specified for dataset:" + ds.label;
82
+ const dataset_json = await readJSONFile(path.join(aiFilesDir, "main.json"));
83
+ const time1 = (/* @__PURE__ */ new Date()).valueOf();
84
+ const class_response = await classifyQuery(user_prompt, llm);
85
+ mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
86
+ let ai_output_json;
87
+ if (class_response.type == "notplot") {
88
+ const time2 = (/* @__PURE__ */ new Date()).valueOf();
89
+ const notPlotResult = await classifyNotPlot(user_prompt, llm, agentFiles, aiFilesDir);
90
+ mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
91
+ if (notPlotResult.type == "html") {
92
+ ai_output_json = notPlotResult;
93
+ } else {
94
+ ai_output_json = {
95
+ type: "text",
96
+ text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
97
+ };
98
+ }
99
+ } else if (class_response.type == "plot") {
100
+ let time = (/* @__PURE__ */ new Date()).valueOf();
101
+ const plotType = await classifyPlotType(user_prompt, llm);
102
+ mayLog("Time taken to classify plot type:", formatElapsedTime(Date.now() - time));
103
+ if (!supportedChartTypes) {
104
+ const errorMsg = "Supported chart types list is undefined. Please check the dataset configuration and ensure that getSupportedChartTypes is implemented correctly. Skipping chart type validation, but this may lead to unsupported chart type errors downstream.";
105
+ console.warn(errorMsg);
106
+ const errorResponse = {
107
+ type: "text",
108
+ text: errorMsg
109
+ };
110
+ return errorResponse;
111
+ }
112
+ if (plotType === "summary") {
113
+ if (!supportedChartTypes.includes("dictionary")) {
114
+ const log = 'Plot type: "' + plotType + '" is not supported.';
115
+ ai_output_json = {
116
+ type: "text",
117
+ text: log
118
+ };
119
+ mayLog(log);
120
+ return ai_output_json;
121
+ }
122
+ } else if (plotType === "dge") {
123
+ if (!supportedChartTypes.includes("DA")) {
124
+ const log = 'Plot type: "' + plotType + '" is not supported.';
125
+ ai_output_json = {
126
+ type: "text",
127
+ text: log
128
+ };
129
+ mayLog(log);
130
+ return ai_output_json;
131
+ }
132
+ } else {
133
+ mayLog(`Supported chart types for this cohort: ${supportedChartTypes}`);
134
+ if (!supportedChartTypes.includes(plotType)) {
135
+ const log = 'Plot type: "' + plotType + '" is not supported.';
136
+ ai_output_json = {
137
+ type: "text",
138
+ text: log
139
+ };
140
+ mayLog(log);
141
+ return ai_output_json;
142
+ }
143
+ }
144
+ mayLog("####### First phase: Infer Plot Scaffolds #######");
145
+ time = (/* @__PURE__ */ new Date()).valueOf();
146
+ const scaffoldResult = await inferScaffold(user_prompt, plotType, llm);
147
+ mayLog("ScaffoldResult: ", scaffoldResult);
148
+ mayLog("Time taken to infer scaffold:", formatElapsedTime(Date.now() - time));
149
+ if (!scaffoldResult)
150
+ throw "Scaffold result is empty or undefined, which is unexpected. Please check the inferScaffold agent for potential issues.";
151
+ const subplotType = scaffoldResult.plotType === "summary" ? scaffoldResult.chartType : void 0;
152
+ mayLog("####### Second phase: From Scaffolds's phrases infer Entities #######");
153
+ const genes_list = await parse_geneset_db(genedb);
154
+ time = (/* @__PURE__ */ new Date()).valueOf();
155
+ const phrase2entityResult = await phrase2entity(scaffoldResult, plotType, llm, genes_list, dataset_json, ds);
156
+ mayLog("Time taken to phrase 2 entity:", formatElapsedTime(Date.now() - time));
157
+ if ("type" in phrase2entityResult && phrase2entityResult.type === "text") {
158
+ return phrase2entityResult;
159
+ }
160
+ mayLog(phrase2entityResult);
161
+ mayLog("####### Third phase: From Entities infer Term Objects #######");
162
+ const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
163
+ time = (/* @__PURE__ */ new Date()).valueOf();
164
+ const termObj = await inferTermObjFromEntity(
165
+ phrase2entityResult,
166
+ plotType,
167
+ llm,
168
+ dataset_db,
169
+ genes_list
170
+ );
171
+ mayLog("Time taken to infer term objects:", formatElapsedTime(Date.now() - time));
172
+ mayLog("Inferred termObj from entity:", JSON.stringify(termObj));
173
+ mayLog("####### Fourth phase: From Term Objects to TwTvs Objects #######");
174
+ time = (/* @__PURE__ */ new Date()).valueOf();
175
+ const twTvsObj = await resolveToTwTvs(termObj, plotType, llm, dataset_db);
176
+ mayLog("Time taken to resolve to TwTvs object from termObj:", formatElapsedTime(Date.now() - time));
177
+ if ("type" in twTvsObj && twTvsObj.type === "text") {
178
+ return twTvsObj;
179
+ }
180
+ mayLog("twTvsObj:", twTvsObj);
181
+ mayLog("####### Fifth/Final phase: From TwTvs Objects to Plot States #######");
182
+ time = (/* @__PURE__ */ new Date()).valueOf();
183
+ ai_output_json = resolveToPlotState(twTvsObj, plotType, subplotType);
184
+ mayLog("Time taken to resolve to plot state:", formatElapsedTime(Date.now() - time));
185
+ }
186
+ return ai_output_json;
187
+ }
188
+ export {
189
+ api,
190
+ run_chat_pipeline
191
+ };
@@ -7,8 +7,15 @@ import serverconfig from "#src/serverconfig.js";
7
7
  import { gdc_validate_query_geneExpression } from "#src/mds3.gdc.js";
8
8
  import { mayLimitSamples } from "#src/mds3.filter.js";
9
9
  import { clusterMethodLst, distanceMethodLst } from "#shared/clustering.js";
10
- import { TermTypes, ISOFORM_EXPRESSION, PROTEOME_ABUNDANCE } from "#shared/terms.js";
11
- import { termType2label } from "#shared/terms.js";
10
+ import { getData } from "#src/termdb.matrix.js";
11
+ import {
12
+ GENE_EXPRESSION,
13
+ METABOLITE_INTENSITY,
14
+ NUMERIC_DICTIONARY_TERM,
15
+ termType2label,
16
+ ISOFORM_EXPRESSION,
17
+ PROTEOME_ABUNDANCE
18
+ } from "#shared/terms.js";
12
19
  import { formatElapsedTime } from "#shared/time.js";
13
20
  const api = {
14
21
  endpoint: "termdb/cluster",
@@ -34,8 +41,9 @@ function init({ genomes }) {
34
41
  if (!ds) throw "invalid dataset name";
35
42
  if (ds.label === "GDC" && !ds.__gdc?.doneCaching)
36
43
  throw "The server has not finished caching the case IDs: try again in about 2 minutes.";
37
- if ([TermTypes.GENE_EXPRESSION, ISOFORM_EXPRESSION, TermTypes.METABOLITE_INTENSITY].includes(q.dataType)) {
38
- if (!ds.queries?.[q.dataType]) throw `no ${q.dataType} data on this dataset`;
44
+ if ([GENE_EXPRESSION, ISOFORM_EXPRESSION, METABOLITE_INTENSITY, NUMERIC_DICTIONARY_TERM].includes(q.dataType)) {
45
+ if (!ds.queries?.[q.dataType] && q.dataType !== NUMERIC_DICTIONARY_TERM)
46
+ throw `no ${q.dataType} data on this dataset`;
39
47
  if (!q.terms) throw `missing gene list`;
40
48
  if (!Array.isArray(q.terms)) throw `gene list is not an array`;
41
49
  if (q.terms.length < 3)
@@ -43,7 +51,7 @@ function init({ genomes }) {
43
51
  result = await getResult(q, ds);
44
52
  } else if (PROTEOME_ABUNDANCE == q.dataType) {
45
53
  const proteomeQuery = ds.queries?.proteome;
46
- if (!proteomeQuery?.get) throw `no ${TermTypes.PROTEOME_ABUNDANCE} data getter on this dataset`;
54
+ if (!proteomeQuery?.get) throw `no ${PROTEOME_ABUNDANCE} data getter on this dataset`;
47
55
  if (!q.terms) throw `missing gene list`;
48
56
  if (!Array.isArray(q.terms)) throw `gene list is not an array`;
49
57
  if (q.terms.length < 3)
@@ -64,13 +72,16 @@ function init({ genomes }) {
64
72
  }
65
73
  async function getResult(q, ds) {
66
74
  let _q = q;
67
- if (q.dataType == TermTypes.GENE_EXPRESSION) {
75
+ if (q.dataType == GENE_EXPRESSION) {
68
76
  _q = JSON.parse(JSON.stringify(q));
69
77
  _q.forClusteringAnalysis = true;
70
78
  _q.__abortSignal = q.__abortSignal;
71
79
  }
72
80
  let term2sample2value, byTermId, bySampleId, skippedSexChrGenes;
73
- if (q.dataType == PROTEOME_ABUNDANCE) {
81
+ if (q.dataType == NUMERIC_DICTIONARY_TERM) {
82
+ ;
83
+ ({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds));
84
+ } else if (q.dataType == PROTEOME_ABUNDANCE) {
74
85
  ;
75
86
  ({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.get(_q));
76
87
  } else {
@@ -90,7 +101,7 @@ async function getResult(q, ds) {
90
101
  const removedHierClusterTerms = [];
91
102
  if (noValueTerms.length) {
92
103
  removedHierClusterTerms.push({
93
- text: `Skipped ${q.dataType == TermTypes.GENE_EXPRESSION ? "genes" : "items"} with no data`,
104
+ text: `Skipped ${q.dataType == GENE_EXPRESSION ? "genes" : "items"} with no data`,
94
105
  lst: noValueTerms
95
106
  });
96
107
  }
@@ -100,7 +111,7 @@ async function getResult(q, ds) {
100
111
  if (term2sample2value.size == 0) throw "no data";
101
112
  if (term2sample2value.size == 1) {
102
113
  const g = Array.from(term2sample2value.keys())[0];
103
- return { term: { gene: g, type: TermTypes.GENE_EXPRESSION }, data: term2sample2value.get(g) };
114
+ return { term: { gene: g, type: GENE_EXPRESSION }, data: term2sample2value.get(g) };
104
115
  }
105
116
  const t = Date.now();
106
117
  const clustering = await doClustering(term2sample2value, q, Object.keys(bySampleId).length);
@@ -109,6 +120,29 @@ async function getResult(q, ds) {
109
120
  if (removedHierClusterTerms.length) result.removedHierClusterTerms = removedHierClusterTerms;
110
121
  return result;
111
122
  }
123
+ async function getNumericDictTermAnnotation(q, ds) {
124
+ const getDataArgs = {
125
+ // TODO: figure out when term is not a termwrapper
126
+ terms: q.terms.map((tw) => tw.term ? tw : { term: tw, q: { mode: "continuous" } }),
127
+ filter: q.filter,
128
+ filter0: q.filter0,
129
+ __protected__: q.__protected__
130
+ };
131
+ const data = await getData(getDataArgs, ds);
132
+ if (data.error) throw data.error;
133
+ const term2sample2value = /* @__PURE__ */ new Map();
134
+ for (const [key, sampleData] of Object.entries(data.samples)) {
135
+ for (const [term, value] of Object.entries(sampleData)) {
136
+ if (term !== "sample") {
137
+ if (!term2sample2value.has(term)) {
138
+ term2sample2value.set(term, {});
139
+ }
140
+ term2sample2value.get(term)[key] = value.value;
141
+ }
142
+ }
143
+ }
144
+ return { term2sample2value, byTermId: data.refs.byTermId, bySampleId: data.refs.bySampleId };
145
+ }
112
146
  async function doClustering(data, q, numCases = 1e3) {
113
147
  const sampleSet = /* @__PURE__ */ new Set();
114
148
  let firstTerm = true;
@@ -297,6 +331,7 @@ async function validateNative(q, ds) {
297
331
  const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
298
332
  if (!sampleId) continue;
299
333
  if (limitSamples && !limitSamples.has(sampleId)) continue;
334
+ if (!Number.isFinite(samplesData[sampleName])) continue;
300
335
  s2v[sampleId] = samplesData[sampleName];
301
336
  }
302
337
  if (Object.keys(s2v).length) {
@@ -47,11 +47,12 @@ function make(q, req, res, ds, genome) {
47
47
  selectCohort: getSelectCohort(ds, req),
48
48
  supportedChartTypes: tdb.q?.getSupportedChartTypes(req),
49
49
  renamedChartTypes: ds.cohort.renamedChartTypes,
50
- allowedTermTypes: getAllowedTermTypes(ds),
50
+ allowedTermTypes: getDsAllowedTermTypes(ds),
51
51
  massSessionDuration: serverconfig.features.massSessionDuration || 30,
52
52
  dataDownloadCatch: tdb.dataDownloadCatch,
53
53
  matrix: tdb.matrix,
54
54
  hierCluster: tdb.hierCluster,
55
+ numericDictTermCluster: tdb.numericDictTermCluster,
55
56
  mclass: tdb.mclass,
56
57
  alwaysRefillCategoricalTermValues: tdb.alwaysRefillCategoricalTermValues,
57
58
  isGeneSetTermdb: tdb.isGeneSetTermdb,
@@ -293,7 +294,7 @@ function addNonDictionaryQueries(c, ds, genome) {
293
294
  q2.images = {};
294
295
  }
295
296
  }
296
- function getAllowedTermTypes(ds) {
297
+ function getDsAllowedTermTypes(ds) {
297
298
  const typeSet = /* @__PURE__ */ new Set();
298
299
  for (const r of ds.cohort.termdb.termtypeByCohort) {
299
300
  if (r.termType) typeSet.add(r.termType);
@@ -329,5 +330,6 @@ function getSelectCohort(ds, req) {
329
330
  return copy;
330
331
  }
331
332
  export {
332
- api
333
+ api,
334
+ getDsAllowedTermTypes
333
335
  };
@@ -3,6 +3,7 @@ import { getData } from "../src/termdb.matrix.js";
3
3
  import { run_R } from "@sjcrh/proteinpaint-r";
4
4
  import { mayLog } from "#src/helpers.ts";
5
5
  import { formatElapsedTime } from "#shared";
6
+ import { renderVolcano } from "../src/renderVolcano.ts";
6
7
  const api = {
7
8
  endpoint: "termdb/diffMeth",
8
9
  methods: {
@@ -39,7 +40,7 @@ function init({ genomes }) {
39
40
  throw new Error(
40
41
  "Differential methylation analysis returned no data. Please verify sample selections and try again."
41
42
  );
42
- if (Array.isArray(results.data) && !results.data.length)
43
+ if ("totalRows" in results.data && results.data.totalRows === 0)
43
44
  throw new Error("No promoters passed filtering. Try relaxing group criteria or selecting more samples.");
44
45
  res.send(results);
45
46
  } catch (e) {
@@ -168,8 +169,9 @@ async function run_diffMeth(param, ds, term_results, term_results2) {
168
169
  const time1 = Date.now();
169
170
  const result = JSON.parse(await run_R("diffMeth.R", JSON.stringify(diffMethInput)));
170
171
  mayLog("Time taken to run diffMeth:", formatElapsedTime(Date.now() - time1));
172
+ const rendered = await renderVolcano(result.promoter_data, param.volcanoRender);
171
173
  const output = {
172
- data: result.promoter_data,
174
+ data: rendered,
173
175
  sample_size1,
174
176
  sample_size2
175
177
  };