@sjcrh/proteinpaint-server 2.177.1-0 → 2.178.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dataset/termdb.test.js +20 -3
- package/package.json +6 -6
- package/routes/grin2.js +25 -6
- package/routes/termdb.chat.js +13 -8
- package/routes/termdb.chat2.js +153 -0
- package/routes/termdb.cluster.js +10 -0
- package/routes/termdb.config.js +15 -4
- package/routes/termdb.diffMeth.js +180 -0
- package/routes/termdb.dmr.js +48 -0
- package/routes/termdb.sampleScatter.js +12 -2
- package/routes/termdb.singlecellSamples.js +40 -4
- package/src/app.js +2742 -707
- package/src/serverconfig.js +1 -1
package/dataset/termdb.test.js
CHANGED
|
@@ -210,18 +210,31 @@ function termdb_test_default() {
|
|
|
210
210
|
defaultTw4correlationPlot: {
|
|
211
211
|
disease: { id: "diaggrp", q: {} }
|
|
212
212
|
},
|
|
213
|
-
|
|
213
|
+
termCollections: [
|
|
214
214
|
{
|
|
215
215
|
name: "Fake Collection 1",
|
|
216
|
+
type: "numeric",
|
|
216
217
|
termIds: ["agedx", "a_death", "a_ndi", "agelastvisit"],
|
|
217
218
|
branchIds: ["Demographic Variables", "Age (years)"],
|
|
218
219
|
propsByTermId: {}
|
|
219
220
|
},
|
|
220
221
|
{
|
|
221
222
|
name: "Fake Collection 2",
|
|
223
|
+
type: "numeric",
|
|
222
224
|
termIds: ["a_death", "a_ndi", "agelastvisit"],
|
|
223
225
|
branchIds: ["Demographic Variables", "Age (years)"],
|
|
224
226
|
propsByTermId: {}
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
name: "Assay Availability",
|
|
230
|
+
type: "categorical",
|
|
231
|
+
categoryKeys: [
|
|
232
|
+
{ key: "1", shown: true },
|
|
233
|
+
{ key: "2", shown: true }
|
|
234
|
+
],
|
|
235
|
+
termIds: ["assayavailability_cnv", "assayavailability_fusion", "assayavailability_germline"],
|
|
236
|
+
branchIds: [""],
|
|
237
|
+
propsByTermId: {}
|
|
225
238
|
}
|
|
226
239
|
]
|
|
227
240
|
},
|
|
@@ -375,7 +388,11 @@ function termdb_test_default() {
|
|
|
375
388
|
},
|
|
376
389
|
dnaMethylation: {
|
|
377
390
|
file: "files/hg38/TermdbTest/dnaMeth.h5",
|
|
378
|
-
unit: "Average Beta Value"
|
|
391
|
+
unit: "Average Beta Value",
|
|
392
|
+
promoter: {
|
|
393
|
+
file: "files/hg38/TermdbTest/dnaMethPromoterMvalue.h5",
|
|
394
|
+
unit: "M-value"
|
|
395
|
+
}
|
|
379
396
|
},
|
|
380
397
|
topVariablyExpressedGenes: {
|
|
381
398
|
src: "native"
|
|
@@ -421,7 +438,7 @@ function termdb_test_default() {
|
|
|
421
438
|
jsonFile: "files/hg38/TermdbTest/trackLst/facet.json",
|
|
422
439
|
activeTracks: ["bw 1", "bed 1"]
|
|
423
440
|
},
|
|
424
|
-
chat: {}
|
|
441
|
+
chat: { aifiles: "./proteinpaint/server/dataset/ai/termdb.test.json" }
|
|
425
442
|
}
|
|
426
443
|
};
|
|
427
444
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.178.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -62,11 +62,11 @@
|
|
|
62
62
|
},
|
|
63
63
|
"dependencies": {
|
|
64
64
|
"@sjcrh/augen": "2.143.0",
|
|
65
|
-
"@sjcrh/proteinpaint-python": "2.
|
|
66
|
-
"@sjcrh/proteinpaint-r": "2.
|
|
67
|
-
"@sjcrh/proteinpaint-rust": "2.
|
|
68
|
-
"@sjcrh/proteinpaint-shared": "2.
|
|
69
|
-
"@sjcrh/proteinpaint-types": "2.
|
|
65
|
+
"@sjcrh/proteinpaint-python": "2.178.0",
|
|
66
|
+
"@sjcrh/proteinpaint-r": "2.178.0",
|
|
67
|
+
"@sjcrh/proteinpaint-rust": "2.178.0",
|
|
68
|
+
"@sjcrh/proteinpaint-shared": "2.178.0",
|
|
69
|
+
"@sjcrh/proteinpaint-types": "2.178.0",
|
|
70
70
|
"@types/express": "^5.0.0",
|
|
71
71
|
"@types/express-session": "^1.18.1",
|
|
72
72
|
"better-sqlite3": "^12.4.1",
|
package/routes/grin2.js
CHANGED
|
@@ -8,6 +8,7 @@ import os from "os";
|
|
|
8
8
|
import { get_samples } from "#src/termdb.sql.js";
|
|
9
9
|
import { read_file, file_is_readable } from "#src/utils.js";
|
|
10
10
|
import { dtsnvindel, dtcnv, dtfusionrna, dtsv, dt2lesion, optionToDt, formatElapsedTime } from "#shared";
|
|
11
|
+
import { mayFilterByMaf } from "#src/mds3.init.js";
|
|
11
12
|
import crypto from "crypto";
|
|
12
13
|
import { promisify } from "node:util";
|
|
13
14
|
import { exec as execCallback } from "node:child_process";
|
|
@@ -32,6 +33,7 @@ const api = {
|
|
|
32
33
|
};
|
|
33
34
|
function init({ genomes }) {
|
|
34
35
|
return async (req, res) => {
|
|
36
|
+
const signal = req.query.__abortSignal;
|
|
35
37
|
try {
|
|
36
38
|
const request = req.query;
|
|
37
39
|
const g = genomes[request.genome];
|
|
@@ -39,9 +41,13 @@ function init({ genomes }) {
|
|
|
39
41
|
const ds = g.datasets?.[request.dslabel];
|
|
40
42
|
if (!ds) throw new Error("ds missing");
|
|
41
43
|
if (!ds.queries?.singleSampleMutation) throw new Error("singleSampleMutation query missing from dataset");
|
|
42
|
-
const result = await runGrin2WithLimit(g, ds, request);
|
|
44
|
+
const result = await runGrin2WithLimit(g, ds, request, signal);
|
|
43
45
|
res.json(result);
|
|
44
46
|
} catch (e) {
|
|
47
|
+
if (signal?.aborted) {
|
|
48
|
+
mayLog("[GRIN2] Analysis aborted due to client disconnect");
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
45
51
|
console.error("[GRIN2] Error stack:", e.stack);
|
|
46
52
|
const errorResponse = {
|
|
47
53
|
status: "error",
|
|
@@ -93,7 +99,7 @@ async function getMaxLesions() {
|
|
|
93
99
|
return MAX_LESIONS;
|
|
94
100
|
}
|
|
95
101
|
let activeGrin2Jobs = 0;
|
|
96
|
-
async function runGrin2WithLimit(g, ds, request) {
|
|
102
|
+
async function runGrin2WithLimit(g, ds, request, signal) {
|
|
97
103
|
if (activeGrin2Jobs >= GRIN2_CONCURRENCY_LIMIT) {
|
|
98
104
|
const error = new Error(
|
|
99
105
|
`GRIN2 analysis queue is full (${GRIN2_CONCURRENCY_LIMIT} concurrent analyses). Please try again in a few minutes.`
|
|
@@ -105,7 +111,7 @@ async function runGrin2WithLimit(g, ds, request) {
|
|
|
105
111
|
activeGrin2Jobs++;
|
|
106
112
|
mayLog(`[GRIN2] Starting analysis. Active jobs: ${activeGrin2Jobs}/${GRIN2_CONCURRENCY_LIMIT}`);
|
|
107
113
|
try {
|
|
108
|
-
return await runGrin2(g, ds, request);
|
|
114
|
+
return await runGrin2(g, ds, request, signal);
|
|
109
115
|
} finally {
|
|
110
116
|
activeGrin2Jobs--;
|
|
111
117
|
mayLog(`[GRIN2] Analysis complete. Active jobs: ${activeGrin2Jobs}/${GRIN2_CONCURRENCY_LIMIT}`);
|
|
@@ -136,7 +142,7 @@ function getCnvLesionType(isGain) {
|
|
|
136
142
|
}
|
|
137
143
|
return lesionType.lesionType;
|
|
138
144
|
}
|
|
139
|
-
async function runGrin2(g, ds, request) {
|
|
145
|
+
async function runGrin2(g, ds, request, signal) {
|
|
140
146
|
const startTime = Date.now();
|
|
141
147
|
const samples = await get_samples(
|
|
142
148
|
request,
|
|
@@ -176,7 +182,7 @@ async function runGrin2(g, ds, request) {
|
|
|
176
182
|
pyInput.chromosomelist[c] = g.majorchr[c];
|
|
177
183
|
}
|
|
178
184
|
const grin2AnalysisStart = Date.now();
|
|
179
|
-
const pyResult = await run_python("grin2PpWrapper.py", JSON.stringify(pyInput));
|
|
185
|
+
const pyResult = await run_python("grin2PpWrapper.py", JSON.stringify(pyInput), { signal });
|
|
180
186
|
if (pyResult.stderr?.trim()) {
|
|
181
187
|
mayLog(`[GRIN2] Python stderr: ${pyResult.stderr}`);
|
|
182
188
|
if (pyResult.stderr.includes("ERROR:")) {
|
|
@@ -201,7 +207,7 @@ async function runGrin2(g, ds, request) {
|
|
|
201
207
|
bin_size: request.binSize
|
|
202
208
|
};
|
|
203
209
|
const manhattanPlotStart = Date.now();
|
|
204
|
-
const rsResult = await run_rust("manhattan_plot", JSON.stringify(rustInput));
|
|
210
|
+
const rsResult = await run_rust("manhattan_plot", JSON.stringify(rustInput), [], { signal });
|
|
205
211
|
const manhattanPlotTime = Date.now() - manhattanPlotStart;
|
|
206
212
|
mayLog(`[GRIN2] Manhattan plot generation took ${formatElapsedTime(manhattanPlotTime)}`);
|
|
207
213
|
const manhattanPlotData = JSON.parse(rsResult);
|
|
@@ -431,6 +437,19 @@ function filterAndConvertSnvIndel(sampleName, entry, options) {
|
|
|
431
437
|
if (!Number.isInteger(entry.pos)) {
|
|
432
438
|
return null;
|
|
433
439
|
}
|
|
440
|
+
if (options.mafFilter?.lst?.length) {
|
|
441
|
+
if (!Array.isArray(entry.vafs)) return null;
|
|
442
|
+
const copy = { dt: dtsnvindel };
|
|
443
|
+
for (const v of entry.vafs) {
|
|
444
|
+
copy[v.id] = v.refCount + "," + v.altCount;
|
|
445
|
+
}
|
|
446
|
+
try {
|
|
447
|
+
if (!mayFilterByMaf(options.mafFilter, copy)) return null;
|
|
448
|
+
} catch (e) {
|
|
449
|
+
mayLog("mayFilterByMaf() crashed on a snvindel " + (e instanceof Error ? e.message : String(e)));
|
|
450
|
+
return null;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
434
453
|
const start = entry.pos;
|
|
435
454
|
const end = entry.pos;
|
|
436
455
|
return [sampleName, entry.chr, start, end, dt2lesion[dtsnvindel].lesionTypes[0].lesionType];
|
package/routes/termdb.chat.js
CHANGED
|
@@ -2,6 +2,7 @@ import fs from "fs";
|
|
|
2
2
|
import { ezFetch } from "#shared";
|
|
3
3
|
import { get_samples } from "#src/termdb.sql.js";
|
|
4
4
|
import { ChatPayload } from "#types/checkers";
|
|
5
|
+
import { extractResourceResponse } from "./chat/resource.ts";
|
|
5
6
|
import serverconfig from "../src/serverconfig.js";
|
|
6
7
|
import { mayLog } from "#src/helpers.ts";
|
|
7
8
|
import Database from "better-sqlite3";
|
|
@@ -143,8 +144,15 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
|
|
|
143
144
|
);
|
|
144
145
|
let ai_output_json;
|
|
145
146
|
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
146
|
-
if (class_response.type == "
|
|
147
|
-
ai_output_json =
|
|
147
|
+
if (class_response.type == "none") {
|
|
148
|
+
ai_output_json = {
|
|
149
|
+
type: "text",
|
|
150
|
+
text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
|
|
151
|
+
};
|
|
152
|
+
} else if (class_response.type == "resource") {
|
|
153
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
154
|
+
ai_output_json = await extractResourceResponse(user_prompt, llm, dataset_json);
|
|
155
|
+
mayLog("Time taken for resource agent:", formatElapsedTime(Date.now() - time12));
|
|
148
156
|
} else if (class_response.type == "plot") {
|
|
149
157
|
const classResult = class_response.plot;
|
|
150
158
|
mayLog("classResult:", classResult);
|
|
@@ -174,7 +182,7 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
|
|
|
174
182
|
);
|
|
175
183
|
mayLog("Time taken for DE agent:", formatElapsedTime(Date.now() - time12));
|
|
176
184
|
} else if (classResult == "survival") {
|
|
177
|
-
ai_output_json = { type: "
|
|
185
|
+
ai_output_json = { type: "text", text: "survival agent has not been implemented yet" };
|
|
178
186
|
} else if (classResult == "matrix") {
|
|
179
187
|
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
180
188
|
ai_output_json = await extract_matrix_search_terms_from_query(
|
|
@@ -200,13 +208,10 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
|
|
|
200
208
|
);
|
|
201
209
|
mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
|
|
202
210
|
} else {
|
|
203
|
-
ai_output_json = { type: "
|
|
211
|
+
ai_output_json = { type: "text", text: "Unknown classification value" };
|
|
204
212
|
}
|
|
205
213
|
} else {
|
|
206
|
-
ai_output_json = {
|
|
207
|
-
type: "html",
|
|
208
|
-
html: "Unknown classification type"
|
|
209
|
-
};
|
|
214
|
+
ai_output_json = { type: "text", text: "Unknown classification type" };
|
|
210
215
|
}
|
|
211
216
|
return ai_output_json;
|
|
212
217
|
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import { ChatPayload } from "#types/checkers";
|
|
2
|
+
import { classifyQuery } from "./chat/classify1.ts";
|
|
3
|
+
import { classifyNotPlot } from "./chat/classify2.ts";
|
|
4
|
+
import { classifyPlotType } from "./chat/plot.ts";
|
|
5
|
+
import { readJSONFile } from "./chat/utils.ts";
|
|
6
|
+
import { extract_DE_search_terms_from_query } from "./chat/DEagent.ts";
|
|
7
|
+
import { extract_summary_terms } from "./chat/summaryagent.ts";
|
|
8
|
+
import { extract_matrix_search_terms_from_query } from "./chat/matrixagent.ts";
|
|
9
|
+
import { extract_samplescatter_terms_from_query } from "./chat/samplescatteragent.ts";
|
|
10
|
+
import { parse_dataset_db, parse_geneset_db, getGenesetNames } from "./chat/utils.ts";
|
|
11
|
+
import serverconfig from "../src/serverconfig.js";
|
|
12
|
+
import { mayLog } from "#src/helpers.ts";
|
|
13
|
+
import { formatElapsedTime } from "#shared";
|
|
14
|
+
const api = {
|
|
15
|
+
endpoint: "termdb/chat2",
|
|
16
|
+
methods: {
|
|
17
|
+
get: {
|
|
18
|
+
...ChatPayload,
|
|
19
|
+
init
|
|
20
|
+
},
|
|
21
|
+
post: {
|
|
22
|
+
...ChatPayload,
|
|
23
|
+
init
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
function init({ genomes }) {
|
|
28
|
+
return async (req, res) => {
|
|
29
|
+
const q = req.query;
|
|
30
|
+
try {
|
|
31
|
+
const g = genomes[q.genome];
|
|
32
|
+
if (!g) throw "invalid genome";
|
|
33
|
+
const ds = g.datasets?.[q.dslabel];
|
|
34
|
+
if (!ds) throw "invalid dslabel";
|
|
35
|
+
const serverconfig_ds_entries = serverconfig.genomes.find((genome) => genome.name == q.genome).datasets.find((dslabel) => dslabel.name == ds.label);
|
|
36
|
+
if (!serverconfig_ds_entries.aifiles) {
|
|
37
|
+
throw "aifiles are missing for chatbot to work";
|
|
38
|
+
}
|
|
39
|
+
const llm = serverconfig.llm;
|
|
40
|
+
if (!llm) throw "serverconfig.llm is not configured";
|
|
41
|
+
if (llm.provider !== "SJ" && llm.provider !== "ollama") {
|
|
42
|
+
throw "llm.provider must be 'SJ' or 'ollama'";
|
|
43
|
+
}
|
|
44
|
+
const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
|
|
45
|
+
const genedb = serverconfig.tpmasterdir + "/" + g.genedb.dbfile;
|
|
46
|
+
const aiFilesPath = serverconfig_ds_entries.aifiles;
|
|
47
|
+
const dataset_json = await readJSONFile(aiFilesPath);
|
|
48
|
+
const testing = false;
|
|
49
|
+
const genesetNames = getGenesetNames(g);
|
|
50
|
+
const ai_output_json = await run_chat_pipeline(
|
|
51
|
+
q.prompt,
|
|
52
|
+
llm,
|
|
53
|
+
serverconfig.aiRoute,
|
|
54
|
+
dataset_json,
|
|
55
|
+
testing,
|
|
56
|
+
dataset_db,
|
|
57
|
+
genedb,
|
|
58
|
+
ds,
|
|
59
|
+
genesetNames
|
|
60
|
+
);
|
|
61
|
+
res.send(ai_output_json);
|
|
62
|
+
} catch (e) {
|
|
63
|
+
if (e.stack) mayLog(e.stack);
|
|
64
|
+
res.send({ error: e?.message || e });
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testing, dataset_db, genedb, ds, genesetNames = []) {
|
|
69
|
+
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
70
|
+
const class_response = await classifyQuery(user_prompt, llm);
|
|
71
|
+
let ai_output_json;
|
|
72
|
+
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
73
|
+
if (class_response.type == "notplot") {
|
|
74
|
+
const time2 = (/* @__PURE__ */ new Date()).valueOf();
|
|
75
|
+
const notPlotResult = await classifyNotPlot(user_prompt, llm, dataset_json);
|
|
76
|
+
mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
|
|
77
|
+
if (notPlotResult.type == "html") {
|
|
78
|
+
ai_output_json = notPlotResult;
|
|
79
|
+
} else {
|
|
80
|
+
ai_output_json = {
|
|
81
|
+
type: "text",
|
|
82
|
+
text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
} else if (class_response.type == "plot") {
|
|
86
|
+
const classResult = await classifyPlotType(user_prompt, llm);
|
|
87
|
+
mayLog("classResult:", classResult);
|
|
88
|
+
const dataset_db_output = await parse_dataset_db(dataset_db);
|
|
89
|
+
const genes_list = dataset_json.hasGeneExpression ? await parse_geneset_db(genedb) : [];
|
|
90
|
+
if (classResult == "summary") {
|
|
91
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
92
|
+
ai_output_json = await extract_summary_terms(
|
|
93
|
+
user_prompt,
|
|
94
|
+
llm,
|
|
95
|
+
dataset_db_output,
|
|
96
|
+
dataset_json,
|
|
97
|
+
genes_list,
|
|
98
|
+
ds,
|
|
99
|
+
testing,
|
|
100
|
+
genesetNames
|
|
101
|
+
);
|
|
102
|
+
mayLog("Time taken for summary agent:", formatElapsedTime(Date.now() - time12));
|
|
103
|
+
} else if (classResult == "dge") {
|
|
104
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
105
|
+
ai_output_json = await extract_DE_search_terms_from_query(
|
|
106
|
+
user_prompt,
|
|
107
|
+
llm,
|
|
108
|
+
dataset_db_output,
|
|
109
|
+
dataset_json,
|
|
110
|
+
ds,
|
|
111
|
+
testing
|
|
112
|
+
);
|
|
113
|
+
mayLog("Time taken for DE agent:", formatElapsedTime(Date.now() - time12));
|
|
114
|
+
} else if (classResult == "survival") {
|
|
115
|
+
ai_output_json = { type: "text", text: "survival agent has not been implemented yet" };
|
|
116
|
+
} else if (classResult == "matrix") {
|
|
117
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
118
|
+
ai_output_json = await extract_matrix_search_terms_from_query(
|
|
119
|
+
user_prompt,
|
|
120
|
+
llm,
|
|
121
|
+
dataset_db_output,
|
|
122
|
+
dataset_json,
|
|
123
|
+
genes_list,
|
|
124
|
+
ds,
|
|
125
|
+
testing,
|
|
126
|
+
genesetNames
|
|
127
|
+
);
|
|
128
|
+
mayLog("Time taken for matrix agent:", formatElapsedTime(Date.now() - time12));
|
|
129
|
+
} else if (classResult == "samplescatter") {
|
|
130
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
131
|
+
ai_output_json = await extract_samplescatter_terms_from_query(
|
|
132
|
+
user_prompt,
|
|
133
|
+
llm,
|
|
134
|
+
dataset_db_output,
|
|
135
|
+
dataset_json,
|
|
136
|
+
genes_list,
|
|
137
|
+
ds,
|
|
138
|
+
testing,
|
|
139
|
+
genesetNames
|
|
140
|
+
);
|
|
141
|
+
mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
|
|
142
|
+
} else {
|
|
143
|
+
ai_output_json = { type: "text", text: "Unknown classification value" };
|
|
144
|
+
}
|
|
145
|
+
} else {
|
|
146
|
+
ai_output_json = { type: "text", text: "Unknown classification type" };
|
|
147
|
+
}
|
|
148
|
+
return ai_output_json;
|
|
149
|
+
}
|
|
150
|
+
export {
|
|
151
|
+
api,
|
|
152
|
+
run_chat_pipeline
|
|
153
|
+
};
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -44,6 +44,13 @@ function init({ genomes }) {
|
|
|
44
44
|
if (q.terms.length < 3)
|
|
45
45
|
throw `A minimum of three genes is required for clustering. Please refresh this page to clear this error.`;
|
|
46
46
|
result = await getResult(q, ds);
|
|
47
|
+
} else if (TermTypes.WHOLE_PROTEOME_ABUNDANCE == q.dataType) {
|
|
48
|
+
if (!ds.queries?.proteome?.whole) throw `no ${TermTypes.WHOLE_PROTEOME_ABUNDANCE} data on this dataset`;
|
|
49
|
+
if (!q.terms) throw `missing gene list`;
|
|
50
|
+
if (!Array.isArray(q.terms)) throw `gene list is not an array`;
|
|
51
|
+
if (q.terms.length < 3)
|
|
52
|
+
throw `A minimum of three genes is required for clustering. Please refresh this page to clear this error.`;
|
|
53
|
+
result = await getResult(q, ds);
|
|
47
54
|
} else {
|
|
48
55
|
throw "unknown q.dataType " + q.dataType;
|
|
49
56
|
}
|
|
@@ -68,6 +75,9 @@ async function getResult(q, ds) {
|
|
|
68
75
|
if (q.dataType == NUMERIC_DICTIONARY_TERM) {
|
|
69
76
|
;
|
|
70
77
|
({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds));
|
|
78
|
+
} else if (q.dataType == TermTypes.WHOLE_PROTEOME_ABUNDANCE) {
|
|
79
|
+
;
|
|
80
|
+
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.whole.get(_q, ds));
|
|
71
81
|
} else {
|
|
72
82
|
;
|
|
73
83
|
({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries[q.dataType].get(_q, ds));
|
package/routes/termdb.config.js
CHANGED
|
@@ -72,7 +72,7 @@ function make(q, req, res, ds, genome) {
|
|
|
72
72
|
if (tdb.survival) c.survival = tdb.survival;
|
|
73
73
|
if (tdb.regression) c.regression = tdb.regression;
|
|
74
74
|
if (tdb.uiLabels) c.uiLabels = tdb.uiLabels;
|
|
75
|
-
if (tdb.
|
|
75
|
+
if (tdb.termCollections) c.termCollections = tdb.termCollections;
|
|
76
76
|
if (ds.assayAvailability) c.assayAvailability = ds.assayAvailability;
|
|
77
77
|
if (ds.cohort.correlationVolcano) c.correlationVolcano = ds.cohort.correlationVolcano;
|
|
78
78
|
if (ds.cohort.boxplots) c.boxplots = ds.cohort.boxplots;
|
|
@@ -124,8 +124,8 @@ function addMatrixplots(c, ds) {
|
|
|
124
124
|
});
|
|
125
125
|
}
|
|
126
126
|
function addMutationSignatureplots(c, ds) {
|
|
127
|
-
const mutationSignatureplots = ds.cohort.termdb.
|
|
128
|
-
(
|
|
127
|
+
const mutationSignatureplots = ds.cohort.termdb.termCollections?.find(
|
|
128
|
+
(tc) => tc.name == "Mutation Signature" && tc.type === "numeric"
|
|
129
129
|
)?.plots;
|
|
130
130
|
if (!mutationSignatureplots) return;
|
|
131
131
|
c.mutationSignatureplots = mutationSignatureplots.map((p) => {
|
|
@@ -191,6 +191,9 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
191
191
|
}
|
|
192
192
|
if (q.dnaMethylation) {
|
|
193
193
|
q2.dnaMethylation = { unit: q.dnaMethylation.unit };
|
|
194
|
+
if (q.dnaMethylation.promoter) {
|
|
195
|
+
q2.dnaMethylation.promoter = { unit: q.dnaMethylation.promoter.unit };
|
|
196
|
+
}
|
|
194
197
|
}
|
|
195
198
|
if (q.ld) {
|
|
196
199
|
q2.ld = structuredClone(q.ld);
|
|
@@ -251,6 +254,9 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
251
254
|
if (q.singleCell.DEgenes) {
|
|
252
255
|
q2.singleCell.DEgenes = { termId: q.singleCell.DEgenes.termId };
|
|
253
256
|
}
|
|
257
|
+
if (q.singleCell?.terms?.length) {
|
|
258
|
+
c.scctTerms = q.singleCell.terms;
|
|
259
|
+
}
|
|
254
260
|
}
|
|
255
261
|
if (q.images) {
|
|
256
262
|
q2.images = {};
|
|
@@ -266,9 +272,14 @@ function getAllowedTermTypes(ds) {
|
|
|
266
272
|
}
|
|
267
273
|
if (ds.queries?.geneExpression) typeSet.add(TermTypes.GENE_EXPRESSION);
|
|
268
274
|
if (ds.queries?.metaboliteIntensity) typeSet.add(TermTypes.METABOLITE_INTENSITY);
|
|
275
|
+
if (ds.queries?.proteome?.whole) typeSet.add(TermTypes.WHOLE_PROTEOME_ABUNDANCE);
|
|
269
276
|
if (ds.queries?.ssGSEA) typeSet.add(TermTypes.SSGSEA);
|
|
270
277
|
if (ds.queries?.dnaMethylation) typeSet.add(TermTypes.DNA_METHYLATION);
|
|
271
|
-
if (ds.
|
|
278
|
+
if (ds.queries?.singleCell) {
|
|
279
|
+
typeSet.add(TermTypes.SINGLECELL_CELLTYPE);
|
|
280
|
+
if (ds.queries.singleCell?.geneExpression) typeSet.add(TermTypes.SINGLECELL_GENE_EXPRESSION);
|
|
281
|
+
}
|
|
282
|
+
if (ds.cohort.termdb.termCollections?.length) typeSet.add("termCollection");
|
|
272
283
|
return [...typeSet];
|
|
273
284
|
}
|
|
274
285
|
function getSelectCohort(ds, req) {
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import { diffMethPayload } from "#types/checkers";
|
|
2
|
+
import { getData } from "../src/termdb.matrix.js";
|
|
3
|
+
import { get_ds_tdb } from "../src/termdb.js";
|
|
4
|
+
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
5
|
+
import { mayLog } from "#src/helpers.ts";
|
|
6
|
+
import { formatElapsedTime } from "#shared";
|
|
7
|
+
const api = {
|
|
8
|
+
endpoint: "termdb/diffMeth",
|
|
9
|
+
methods: {
|
|
10
|
+
get: {
|
|
11
|
+
...diffMethPayload,
|
|
12
|
+
init
|
|
13
|
+
},
|
|
14
|
+
post: {
|
|
15
|
+
...diffMethPayload,
|
|
16
|
+
init
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
function init({ genomes }) {
|
|
21
|
+
return async (req, res) => {
|
|
22
|
+
try {
|
|
23
|
+
const q = req.query;
|
|
24
|
+
const genome = genomes[q.genome];
|
|
25
|
+
if (!genome) throw new Error("invalid genome");
|
|
26
|
+
const [ds] = get_ds_tdb(genome, q);
|
|
27
|
+
let term_results = [];
|
|
28
|
+
if (q.tw) {
|
|
29
|
+
term_results = await getData({ filter: q.filter, filter0: q.filter0, terms: [q.tw] }, ds);
|
|
30
|
+
if (term_results.error) throw new Error(term_results.error);
|
|
31
|
+
}
|
|
32
|
+
let term_results2 = [];
|
|
33
|
+
if (q.tw2) {
|
|
34
|
+
term_results2 = await getData({ filter: q.filter, filter0: q.filter0, terms: [q.tw2] }, ds);
|
|
35
|
+
if (term_results2.error) throw new Error(term_results2.error);
|
|
36
|
+
}
|
|
37
|
+
const results = await run_diffMeth(req.query, ds, term_results, term_results2);
|
|
38
|
+
if (!results || !results.data) throw new Error("No data available");
|
|
39
|
+
res.send(results);
|
|
40
|
+
} catch (e) {
|
|
41
|
+
res.send({ status: "error", error: e.message || e });
|
|
42
|
+
if (e instanceof Error && e.stack) console.log(e);
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
async function run_diffMeth(param, ds, term_results, term_results2) {
|
|
47
|
+
if (param.samplelst?.groups?.length != 2) throw new Error(".samplelst.groups.length!=2");
|
|
48
|
+
if (param.samplelst.groups[0].values?.length < 1) throw new Error("samplelst.groups[0].values.length<1");
|
|
49
|
+
if (param.samplelst.groups[1].values?.length < 1) throw new Error("samplelst.groups[1].values.length<1");
|
|
50
|
+
const q = ds.queries.dnaMethylation?.promoter;
|
|
51
|
+
if (!q) throw new Error("ds.queries.dnaMethylation.promoter is not configured");
|
|
52
|
+
if (!q.file) throw new Error("ds.queries.dnaMethylation.promoter.file is missing");
|
|
53
|
+
const group1names = [];
|
|
54
|
+
const conf1_group1 = [];
|
|
55
|
+
const conf2_group1 = [];
|
|
56
|
+
for (const s of param.samplelst.groups[0].values) {
|
|
57
|
+
if (!Number.isInteger(s.sampleId)) continue;
|
|
58
|
+
const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
|
|
59
|
+
if (!n) continue;
|
|
60
|
+
if (!q.allSampleSet.has(n)) continue;
|
|
61
|
+
if (param.tw && param.tw2) {
|
|
62
|
+
if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
|
|
63
|
+
conf1_group1.push(
|
|
64
|
+
param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
|
|
65
|
+
);
|
|
66
|
+
conf2_group1.push(
|
|
67
|
+
param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
|
|
68
|
+
);
|
|
69
|
+
group1names.push(n);
|
|
70
|
+
}
|
|
71
|
+
} else if (param.tw && !param.tw2) {
|
|
72
|
+
if (term_results.samples[s.sampleId]) {
|
|
73
|
+
conf1_group1.push(
|
|
74
|
+
param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
|
|
75
|
+
);
|
|
76
|
+
group1names.push(n);
|
|
77
|
+
}
|
|
78
|
+
} else if (!param.tw && param.tw2) {
|
|
79
|
+
if (term_results2.samples[s.sampleId]) {
|
|
80
|
+
conf2_group1.push(
|
|
81
|
+
param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
|
|
82
|
+
);
|
|
83
|
+
group1names.push(n);
|
|
84
|
+
}
|
|
85
|
+
} else {
|
|
86
|
+
group1names.push(n);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
const group2names = [];
|
|
90
|
+
const conf1_group2 = [];
|
|
91
|
+
const conf2_group2 = [];
|
|
92
|
+
for (const s of param.samplelst.groups[1].values) {
|
|
93
|
+
if (!Number.isInteger(s.sampleId)) continue;
|
|
94
|
+
const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
|
|
95
|
+
if (!n) continue;
|
|
96
|
+
if (!q.allSampleSet.has(n)) continue;
|
|
97
|
+
if (param.tw && param.tw2) {
|
|
98
|
+
if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
|
|
99
|
+
conf1_group2.push(
|
|
100
|
+
param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
|
|
101
|
+
);
|
|
102
|
+
conf2_group2.push(
|
|
103
|
+
param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
|
|
104
|
+
);
|
|
105
|
+
group2names.push(n);
|
|
106
|
+
}
|
|
107
|
+
} else if (param.tw && !param.tw2) {
|
|
108
|
+
if (term_results.samples[s.sampleId]) {
|
|
109
|
+
conf1_group2.push(
|
|
110
|
+
param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
|
|
111
|
+
);
|
|
112
|
+
group2names.push(n);
|
|
113
|
+
}
|
|
114
|
+
} else if (!param.tw && param.tw2) {
|
|
115
|
+
if (term_results2.samples[s.sampleId]) {
|
|
116
|
+
conf2_group2.push(
|
|
117
|
+
param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
|
|
118
|
+
);
|
|
119
|
+
group2names.push(n);
|
|
120
|
+
}
|
|
121
|
+
} else {
|
|
122
|
+
group2names.push(n);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
const sample_size1 = group1names.length;
|
|
126
|
+
const sample_size2 = group2names.length;
|
|
127
|
+
const alerts = validateGroups(sample_size1, sample_size2, group1names, group2names);
|
|
128
|
+
if (param.preAnalysis) {
|
|
129
|
+
const group1Name = param.samplelst.groups[0].name;
|
|
130
|
+
const group2Name = param.samplelst.groups[1].name;
|
|
131
|
+
return {
|
|
132
|
+
data: {
|
|
133
|
+
[group1Name]: sample_size1,
|
|
134
|
+
[group2Name]: sample_size2,
|
|
135
|
+
...alerts.length ? { alert: alerts.join(" | ") } : {}
|
|
136
|
+
}
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
if (alerts.length) throw new Error(alerts.join(" | "));
|
|
140
|
+
const diffMethInput = {
|
|
141
|
+
case: group2names.join(","),
|
|
142
|
+
control: group1names.join(","),
|
|
143
|
+
input_file: q.file,
|
|
144
|
+
min_samples_per_group: param.min_samples_per_group
|
|
145
|
+
};
|
|
146
|
+
if (param.tw) {
|
|
147
|
+
diffMethInput.conf1 = [...conf1_group2, ...conf1_group1];
|
|
148
|
+
diffMethInput.conf1_mode = param.tw.q.mode;
|
|
149
|
+
if (new Set(diffMethInput.conf1).size === 1) {
|
|
150
|
+
throw new Error("Confounding variable 1 has only one value");
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
if (param.tw2) {
|
|
154
|
+
diffMethInput.conf2 = [...conf2_group2, ...conf2_group1];
|
|
155
|
+
diffMethInput.conf2_mode = param.tw2.q.mode;
|
|
156
|
+
if (new Set(diffMethInput.conf2).size === 1) {
|
|
157
|
+
throw new Error("Confounding variable 2 has only one value");
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
const time1 = Date.now();
|
|
161
|
+
const result = JSON.parse(await run_R("diffMeth.R", JSON.stringify(diffMethInput)));
|
|
162
|
+
mayLog("Time taken to run diffMeth:", formatElapsedTime(Date.now() - time1));
|
|
163
|
+
const output = {
|
|
164
|
+
data: result.promoter_data,
|
|
165
|
+
sample_size1,
|
|
166
|
+
sample_size2
|
|
167
|
+
};
|
|
168
|
+
return output;
|
|
169
|
+
}
|
|
170
|
+
function validateGroups(sample_size1, sample_size2, group1names, group2names) {
|
|
171
|
+
const alerts = [];
|
|
172
|
+
if (sample_size1 < 1) alerts.push("sample size of group1 < 1");
|
|
173
|
+
if (sample_size2 < 1) alerts.push("sample size of group2 < 1");
|
|
174
|
+
const commonnames = group1names.filter((x) => group2names.includes(x));
|
|
175
|
+
if (commonnames.length) alerts.push(`Common elements found between both groups: ${commonnames.join(", ")}`);
|
|
176
|
+
return alerts;
|
|
177
|
+
}
|
|
178
|
+
export {
|
|
179
|
+
api
|
|
180
|
+
};
|