@sjcrh/proteinpaint-server 2.183.2-0 → 2.184.1-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/routes/profile.barchart2.js +114 -0
- package/routes/termdb.chat3.js +191 -0
- package/routes/termdb.config.js +11 -7
- package/routes/termdb.proteome.js +72 -42
- package/src/app.js +2383 -1934
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.184.1-0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -66,7 +66,7 @@
|
|
|
66
66
|
"@sjcrh/proteinpaint-r": "2.181.0",
|
|
67
67
|
"@sjcrh/proteinpaint-rust": "2.183.0",
|
|
68
68
|
"@sjcrh/proteinpaint-shared": "2.183.0",
|
|
69
|
-
"@sjcrh/proteinpaint-types": "2.
|
|
69
|
+
"@sjcrh/proteinpaint-types": "2.184.1-0",
|
|
70
70
|
"@types/express": "^5.0.0",
|
|
71
71
|
"@types/express-session": "^1.18.1",
|
|
72
72
|
"better-sqlite3": "^12.4.1",
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { ProfileScoresPayload } from "#types/checkers";
|
|
2
|
+
import { getData } from "../src/termdb.matrix.js";
|
|
3
|
+
const api = {
|
|
4
|
+
endpoint: "termdb/profileBarchart2Scores",
|
|
5
|
+
methods: {
|
|
6
|
+
get: {
|
|
7
|
+
...ProfileScoresPayload,
|
|
8
|
+
init
|
|
9
|
+
},
|
|
10
|
+
post: {
|
|
11
|
+
...ProfileScoresPayload,
|
|
12
|
+
init
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
};
|
|
16
|
+
function init({ genomes }) {
|
|
17
|
+
return async (req, res) => {
|
|
18
|
+
try {
|
|
19
|
+
const g = genomes[req.query.genome];
|
|
20
|
+
if (!g) throw "invalid genome name";
|
|
21
|
+
const ds = g.datasets?.[req.query.dslabel];
|
|
22
|
+
const result = await getScores(req.query, ds);
|
|
23
|
+
res.send(result);
|
|
24
|
+
} catch (e) {
|
|
25
|
+
console.log(e);
|
|
26
|
+
res.send({ status: "error", error: e.message || e });
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
function derivePrefix(query) {
|
|
31
|
+
const firstScoreId = query.scoreTerms?.[0]?.score?.term?.id;
|
|
32
|
+
if (firstScoreId?.startsWith("F")) return "F";
|
|
33
|
+
if (firstScoreId?.startsWith("A")) return "A";
|
|
34
|
+
for (const entry of query.filter?.lst || []) {
|
|
35
|
+
const id = entry.tvs?.term?.id;
|
|
36
|
+
if (id?.startsWith("F")) return "F";
|
|
37
|
+
if (id?.startsWith("A")) return "A";
|
|
38
|
+
}
|
|
39
|
+
throw "cannot determine cohort prefix from scoreTerms or filter term IDs";
|
|
40
|
+
}
|
|
41
|
+
async function getScores(query, ds) {
|
|
42
|
+
const { activeCohort, clientAuthResult } = query.__protected__;
|
|
43
|
+
const prefix = derivePrefix(query);
|
|
44
|
+
const facilityTermId = `${prefix}UNIT`;
|
|
45
|
+
const facilityTW = { term: { id: facilityTermId }, q: {} };
|
|
46
|
+
const terms = [facilityTW];
|
|
47
|
+
for (const t of query.scoreTerms) {
|
|
48
|
+
terms.push(t.score);
|
|
49
|
+
if (t.maxScore?.term) terms.push(t.maxScore);
|
|
50
|
+
}
|
|
51
|
+
if (!query.filterByUserSites) {
|
|
52
|
+
query.__protected__.ignoredTermIds.push(facilityTermId);
|
|
53
|
+
}
|
|
54
|
+
const cohortAuth = clientAuthResult[activeCohort];
|
|
55
|
+
const isPublic = !cohortAuth?.role || cohortAuth.role === "public";
|
|
56
|
+
const userSites = cohortAuth?.sites;
|
|
57
|
+
const raw = await getData(
|
|
58
|
+
{
|
|
59
|
+
terms,
|
|
60
|
+
filter: query.filter,
|
|
61
|
+
__protected__: query.__protected__
|
|
62
|
+
},
|
|
63
|
+
ds
|
|
64
|
+
);
|
|
65
|
+
if (raw.error) throw raw.error;
|
|
66
|
+
const sampleList = Object.values(raw.samples);
|
|
67
|
+
let sites = sampleList.map((s) => {
|
|
68
|
+
const val = s[facilityTW.$id].value;
|
|
69
|
+
let label = facilityTW.term.values?.[val]?.label || val;
|
|
70
|
+
if (label.length > 50) label = label.slice(0, 47) + "...";
|
|
71
|
+
return { value: val, label };
|
|
72
|
+
});
|
|
73
|
+
if (userSites && query.filterByUserSites) {
|
|
74
|
+
sites = sites.filter((s) => userSites.includes(s.value));
|
|
75
|
+
}
|
|
76
|
+
sites.sort((a, b) => a.label.localeCompare(b.label));
|
|
77
|
+
const samples = Object.values(raw.samples);
|
|
78
|
+
const eligibleSamples = userSites && query.filterByUserSites ? samples.filter((s) => userSites.includes(s[facilityTW.$id].value)) : samples;
|
|
79
|
+
const term2Score = {};
|
|
80
|
+
for (const d of query.scoreTerms) {
|
|
81
|
+
const score = computeMedianPercentage(d, eligibleSamples);
|
|
82
|
+
if (score !== null) term2Score[d.score.term.id] = score;
|
|
83
|
+
}
|
|
84
|
+
return {
|
|
85
|
+
term2Score,
|
|
86
|
+
// Public users see only aggregated scores — do not expose site IDs or names
|
|
87
|
+
sites: isPublic ? [] : sites,
|
|
88
|
+
n: eligibleSamples.length
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
function computeMedianPercentage(d, samples) {
|
|
92
|
+
const percentages = [];
|
|
93
|
+
for (const s of samples) {
|
|
94
|
+
const scoreValue = s[d.score.$id]?.value;
|
|
95
|
+
if (scoreValue == null) continue;
|
|
96
|
+
let maxScoreValue = null;
|
|
97
|
+
if (typeof d.maxScore === "number") {
|
|
98
|
+
maxScoreValue = d.maxScore;
|
|
99
|
+
} else {
|
|
100
|
+
maxScoreValue = s[d.maxScore.$id]?.value;
|
|
101
|
+
}
|
|
102
|
+
if (maxScoreValue == null || maxScoreValue === 0) continue;
|
|
103
|
+
const percentage = scoreValue / maxScoreValue * 100;
|
|
104
|
+
percentages.push(percentage);
|
|
105
|
+
}
|
|
106
|
+
if (percentages.length === 0) return null;
|
|
107
|
+
percentages.sort((a, b) => a - b);
|
|
108
|
+
const mid = Math.floor(percentages.length / 2);
|
|
109
|
+
const median = percentages.length % 2 !== 0 ? percentages[mid] : (percentages[mid - 1] + percentages[mid]) / 2;
|
|
110
|
+
return Math.round(median);
|
|
111
|
+
}
|
|
112
|
+
export {
|
|
113
|
+
api
|
|
114
|
+
};
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import { ChatPayload } from "#types/checkers";
|
|
2
|
+
import { mayLog } from "#src/helpers.ts";
|
|
3
|
+
import { formatElapsedTime } from "#shared";
|
|
4
|
+
import { readJSONFile, parse_geneset_db } from "./chat/utils.ts";
|
|
5
|
+
import { classifyQuery } from "./chat/classify1.ts";
|
|
6
|
+
import { classifyPlotType } from "./chat/plot.ts";
|
|
7
|
+
import { classifyNotPlot } from "./chat/classify2.ts";
|
|
8
|
+
import { inferScaffold } from "./chat/scaffold.ts";
|
|
9
|
+
import serverconfig from "../src/serverconfig.js";
|
|
10
|
+
import { getDsAllowedTermTypes } from "./termdb.config.ts";
|
|
11
|
+
import { phrase2entity } from "./chat/phrase2entity.ts";
|
|
12
|
+
import { inferTermObjFromEntity } from "./chat/entity2termObj.ts";
|
|
13
|
+
import { resolveToTwTvs } from "./chat/entity2twTvs.ts";
|
|
14
|
+
import path from "path";
|
|
15
|
+
import fs from "fs";
|
|
16
|
+
import { resolveToPlotState } from "./chat/scaffold2state.ts";
|
|
17
|
+
const api = {
|
|
18
|
+
endpoint: "termdb/chat3",
|
|
19
|
+
methods: {
|
|
20
|
+
get: {
|
|
21
|
+
...ChatPayload,
|
|
22
|
+
init
|
|
23
|
+
},
|
|
24
|
+
post: {
|
|
25
|
+
...ChatPayload,
|
|
26
|
+
init
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
function init({ genomes }) {
|
|
31
|
+
return async (req, res) => {
|
|
32
|
+
const q = req.query;
|
|
33
|
+
try {
|
|
34
|
+
const g = genomes[q.genome];
|
|
35
|
+
if (!g) throw "invalid genome";
|
|
36
|
+
const ds = g.datasets?.[q.dslabel];
|
|
37
|
+
if (!ds) throw "invalid dslabel";
|
|
38
|
+
const aiFilesDir = serverconfig.binpath + "/../../dataset/ai/" + q.dslabel;
|
|
39
|
+
let agentFiles = [];
|
|
40
|
+
try {
|
|
41
|
+
agentFiles = await fs.readdirSync(aiFilesDir).filter((file) => file.endsWith(".json"));
|
|
42
|
+
} catch (err) {
|
|
43
|
+
if (err.code === "ENOENT") throw new Error(`Directory not found: ${aiFilesDir}`);
|
|
44
|
+
if (err.code === "ENOTDIR") throw new Error(`Path is not a directory: ${aiFilesDir}`);
|
|
45
|
+
throw err;
|
|
46
|
+
}
|
|
47
|
+
const llm = serverconfig.llm;
|
|
48
|
+
if (!llm) throw "serverconfig.llm is not configured";
|
|
49
|
+
if (llm.provider !== "SJ" && llm.provider !== "ollama" && llm.provider !== "huggingface" && llm.provider !== "azure") {
|
|
50
|
+
throw "llm.provider must be 'SJ', 'ollama', 'huggingface', or 'azure'";
|
|
51
|
+
}
|
|
52
|
+
const rawFilter = typeof q.filter === "string" ? JSON.parse(q.filter) : q.filter;
|
|
53
|
+
const filter = rawFilter && typeof rawFilter === "object" ? rawFilter : {};
|
|
54
|
+
const lst = Array.isArray(filter.lst) ? filter.lst : [];
|
|
55
|
+
const cohortFilter = lst.find((item) => item.tag === "cohortFilter");
|
|
56
|
+
const cohortKey = cohortFilter ? cohortFilter.tvs.values[0].key : "";
|
|
57
|
+
const supportedChartTypes = ds.cohort.termdb.q?.getSupportedChartTypes(req)?.[cohortKey];
|
|
58
|
+
const genedb = serverconfig.tpmasterdir + "/" + g.genedb.dbfile;
|
|
59
|
+
const _allowedTermTypes = getDsAllowedTermTypes(ds);
|
|
60
|
+
const ai_output_json = await run_chat_pipeline(
|
|
61
|
+
q.prompt,
|
|
62
|
+
llm,
|
|
63
|
+
ds,
|
|
64
|
+
genedb,
|
|
65
|
+
agentFiles,
|
|
66
|
+
aiFilesDir,
|
|
67
|
+
supportedChartTypes,
|
|
68
|
+
_allowedTermTypes
|
|
69
|
+
// testing
|
|
70
|
+
);
|
|
71
|
+
mayLog("From init: Final AI output JSON:", JSON.stringify(ai_output_json));
|
|
72
|
+
res.send(ai_output_json);
|
|
73
|
+
} catch (e) {
|
|
74
|
+
if (e.stack) mayLog(e.stack);
|
|
75
|
+
res.send({ error: e?.message || e });
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFilesDir, supportedChartTypes, _allowedTermTypes) {
|
|
80
|
+
if (!fs.existsSync(path.join(aiFilesDir, "main.json")))
|
|
81
|
+
throw "Main data file is not specified for dataset:" + ds.label;
|
|
82
|
+
const dataset_json = await readJSONFile(path.join(aiFilesDir, "main.json"));
|
|
83
|
+
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
84
|
+
const class_response = await classifyQuery(user_prompt, llm);
|
|
85
|
+
mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
|
|
86
|
+
let ai_output_json;
|
|
87
|
+
if (class_response.type == "notplot") {
|
|
88
|
+
const time2 = (/* @__PURE__ */ new Date()).valueOf();
|
|
89
|
+
const notPlotResult = await classifyNotPlot(user_prompt, llm, agentFiles, aiFilesDir);
|
|
90
|
+
mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
|
|
91
|
+
if (notPlotResult.type == "html") {
|
|
92
|
+
ai_output_json = notPlotResult;
|
|
93
|
+
} else {
|
|
94
|
+
ai_output_json = {
|
|
95
|
+
type: "text",
|
|
96
|
+
text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
} else if (class_response.type == "plot") {
|
|
100
|
+
let time = (/* @__PURE__ */ new Date()).valueOf();
|
|
101
|
+
const plotType = await classifyPlotType(user_prompt, llm);
|
|
102
|
+
mayLog("Time taken to classify plot type:", formatElapsedTime(Date.now() - time));
|
|
103
|
+
if (!supportedChartTypes) {
|
|
104
|
+
const errorMsg = "Supported chart types list is undefined. Please check the dataset configuration and ensure that getSupportedChartTypes is implemented correctly. Skipping chart type validation, but this may lead to unsupported chart type errors downstream.";
|
|
105
|
+
console.warn(errorMsg);
|
|
106
|
+
const errorResponse = {
|
|
107
|
+
type: "text",
|
|
108
|
+
text: errorMsg
|
|
109
|
+
};
|
|
110
|
+
return errorResponse;
|
|
111
|
+
}
|
|
112
|
+
if (plotType === "summary") {
|
|
113
|
+
if (!supportedChartTypes.includes("dictionary")) {
|
|
114
|
+
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
115
|
+
ai_output_json = {
|
|
116
|
+
type: "text",
|
|
117
|
+
text: log
|
|
118
|
+
};
|
|
119
|
+
mayLog(log);
|
|
120
|
+
return ai_output_json;
|
|
121
|
+
}
|
|
122
|
+
} else if (plotType === "dge") {
|
|
123
|
+
if (!supportedChartTypes.includes("DA")) {
|
|
124
|
+
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
125
|
+
ai_output_json = {
|
|
126
|
+
type: "text",
|
|
127
|
+
text: log
|
|
128
|
+
};
|
|
129
|
+
mayLog(log);
|
|
130
|
+
return ai_output_json;
|
|
131
|
+
}
|
|
132
|
+
} else {
|
|
133
|
+
mayLog(`Supported chart types for this cohort: ${supportedChartTypes}`);
|
|
134
|
+
if (!supportedChartTypes.includes(plotType)) {
|
|
135
|
+
const log = 'Plot type: "' + plotType + '" is not supported.';
|
|
136
|
+
ai_output_json = {
|
|
137
|
+
type: "text",
|
|
138
|
+
text: log
|
|
139
|
+
};
|
|
140
|
+
mayLog(log);
|
|
141
|
+
return ai_output_json;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
mayLog("####### First phase: Infer Plot Scaffolds #######");
|
|
145
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
146
|
+
const scaffoldResult = await inferScaffold(user_prompt, plotType, llm);
|
|
147
|
+
mayLog("ScaffoldResult: ", scaffoldResult);
|
|
148
|
+
mayLog("Time taken to infer scaffold:", formatElapsedTime(Date.now() - time));
|
|
149
|
+
if (!scaffoldResult)
|
|
150
|
+
throw "Scaffold result is empty or undefined, which is unexpected. Please check the inferScaffold agent for potential issues.";
|
|
151
|
+
const subplotType = scaffoldResult.plotType === "summary" ? scaffoldResult.chartType : void 0;
|
|
152
|
+
mayLog("####### Second phase: From Scaffolds's phrases infer Entities #######");
|
|
153
|
+
const genes_list = await parse_geneset_db(genedb);
|
|
154
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
155
|
+
const phrase2entityResult = await phrase2entity(scaffoldResult, plotType, llm, genes_list, dataset_json, ds);
|
|
156
|
+
mayLog("Time taken to phrase 2 entity:", formatElapsedTime(Date.now() - time));
|
|
157
|
+
if ("type" in phrase2entityResult && phrase2entityResult.type === "text") {
|
|
158
|
+
return phrase2entityResult;
|
|
159
|
+
}
|
|
160
|
+
mayLog(phrase2entityResult);
|
|
161
|
+
mayLog("####### Third phase: From Entities infer Term Objects #######");
|
|
162
|
+
const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
|
|
163
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
164
|
+
const termObj = await inferTermObjFromEntity(
|
|
165
|
+
phrase2entityResult,
|
|
166
|
+
plotType,
|
|
167
|
+
llm,
|
|
168
|
+
dataset_db,
|
|
169
|
+
genes_list
|
|
170
|
+
);
|
|
171
|
+
mayLog("Time taken to infer term objects:", formatElapsedTime(Date.now() - time));
|
|
172
|
+
mayLog("Inferred termObj from entity:", JSON.stringify(termObj));
|
|
173
|
+
mayLog("####### Fourth phase: From Term Objects to TwTvs Objects #######");
|
|
174
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
175
|
+
const twTvsObj = await resolveToTwTvs(termObj, plotType, llm, dataset_db);
|
|
176
|
+
mayLog("Time taken to resolve to TwTvs object from termObj:", formatElapsedTime(Date.now() - time));
|
|
177
|
+
if ("type" in twTvsObj && twTvsObj.type === "text") {
|
|
178
|
+
return twTvsObj;
|
|
179
|
+
}
|
|
180
|
+
mayLog("twTvsObj:", twTvsObj);
|
|
181
|
+
mayLog("####### Fifth/Final phase: From TwTvs Objects to Plot States #######");
|
|
182
|
+
time = (/* @__PURE__ */ new Date()).valueOf();
|
|
183
|
+
ai_output_json = resolveToPlotState(twTvsObj, plotType, subplotType);
|
|
184
|
+
mayLog("Time taken to resolve to plot state:", formatElapsedTime(Date.now() - time));
|
|
185
|
+
}
|
|
186
|
+
return ai_output_json;
|
|
187
|
+
}
|
|
188
|
+
export {
|
|
189
|
+
api,
|
|
190
|
+
run_chat_pipeline
|
|
191
|
+
};
|
package/routes/termdb.config.js
CHANGED
|
@@ -47,7 +47,7 @@ function make(q, req, res, ds, genome) {
|
|
|
47
47
|
selectCohort: getSelectCohort(ds, req),
|
|
48
48
|
supportedChartTypes: tdb.q?.getSupportedChartTypes(req),
|
|
49
49
|
renamedChartTypes: ds.cohort.renamedChartTypes,
|
|
50
|
-
allowedTermTypes:
|
|
50
|
+
allowedTermTypes: getDsAllowedTermTypes(ds),
|
|
51
51
|
massSessionDuration: serverconfig.features.massSessionDuration || 30,
|
|
52
52
|
dataDownloadCatch: tdb.dataDownloadCatch,
|
|
53
53
|
matrix: tdb.matrix,
|
|
@@ -194,6 +194,9 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
194
194
|
}
|
|
195
195
|
if (q.proteome) {
|
|
196
196
|
q2.proteome = {};
|
|
197
|
+
if (q.proteome.overlayTerm) {
|
|
198
|
+
q2.proteome.overlayTerm = JSON.parse(JSON.stringify(q.proteome.overlayTerm));
|
|
199
|
+
}
|
|
197
200
|
if (q.proteome.assays) {
|
|
198
201
|
q2.proteome.assays = {};
|
|
199
202
|
for (const assay in q.proteome.assays) {
|
|
@@ -203,11 +206,11 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
203
206
|
for (const cohort in q.proteome.assays[assay].cohorts) {
|
|
204
207
|
q2.proteome.assays[assay].cohorts[cohort] = {};
|
|
205
208
|
const src = q.proteome.assays[assay].cohorts[cohort];
|
|
206
|
-
if ("
|
|
207
|
-
q2.proteome.assays[assay].cohorts[cohort].
|
|
209
|
+
if ("controlFilter" in src) {
|
|
210
|
+
q2.proteome.assays[assay].cohorts[cohort].controlFilter = JSON.parse(JSON.stringify(src.controlFilter));
|
|
208
211
|
}
|
|
209
|
-
if ("
|
|
210
|
-
q2.proteome.assays[assay].cohorts[cohort].
|
|
212
|
+
if ("caseFilter" in src) {
|
|
213
|
+
q2.proteome.assays[assay].cohorts[cohort].caseFilter = JSON.parse(JSON.stringify(src.caseFilter));
|
|
211
214
|
}
|
|
212
215
|
}
|
|
213
216
|
}
|
|
@@ -290,7 +293,7 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
290
293
|
q2.images = {};
|
|
291
294
|
}
|
|
292
295
|
}
|
|
293
|
-
function
|
|
296
|
+
function getDsAllowedTermTypes(ds) {
|
|
294
297
|
const typeSet = /* @__PURE__ */ new Set();
|
|
295
298
|
for (const r of ds.cohort.termdb.termtypeByCohort) {
|
|
296
299
|
if (r.termType) typeSet.add(r.termType);
|
|
@@ -326,5 +329,6 @@ function getSelectCohort(ds, req) {
|
|
|
326
329
|
return copy;
|
|
327
330
|
}
|
|
328
331
|
export {
|
|
329
|
-
api
|
|
332
|
+
api,
|
|
333
|
+
getDsAllowedTermTypes
|
|
330
334
|
};
|
|
@@ -28,16 +28,11 @@ function init({ genomes }) {
|
|
|
28
28
|
const cohorts = [];
|
|
29
29
|
for (const assayName in ds.queries.proteome.assays) {
|
|
30
30
|
const assay = ds.queries.proteome.assays[assayName];
|
|
31
|
-
for (const
|
|
31
|
+
for (const cohortName in assay.cohorts || {}) {
|
|
32
32
|
const details = {
|
|
33
33
|
dbfile: ds.queries.proteome.dbfile,
|
|
34
|
-
assayName,
|
|
35
|
-
|
|
36
|
-
cohortControlFilter: cohort.controlFilter,
|
|
37
|
-
cohortCaseFilter: cohort.caseFilter,
|
|
38
|
-
PTMType: assay.PTMType,
|
|
39
|
-
assayColumnIdx: assay.columnIdx,
|
|
40
|
-
assayColumnValue: assay.columnValue
|
|
34
|
+
assay: assayName,
|
|
35
|
+
cohort: cohortName
|
|
41
36
|
};
|
|
42
37
|
const tw = {
|
|
43
38
|
$id: "_",
|
|
@@ -214,16 +209,14 @@ async function validate_query_proteome(ds) {
|
|
|
214
209
|
}
|
|
215
210
|
for (const assayName in q.assays) {
|
|
216
211
|
const assay = q.assays[assayName];
|
|
217
|
-
if (
|
|
218
|
-
if (
|
|
212
|
+
if (assay.columnIdx == null) throw `queries.proteome.assays.${assayName}.columnIdx missing`;
|
|
213
|
+
if (assay.columnValue == null) throw `queries.proteome.assays.${assayName}.columnValue missing`;
|
|
219
214
|
if (assay.cohorts) {
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
if (!cohort.cohortName) throw `Missing cohortName in queries.proteome.assays.${assayName}.cohorts`;
|
|
215
|
+
for (const cohortName in assay.cohorts) {
|
|
216
|
+
const cohort = assay.cohorts[cohortName];
|
|
223
217
|
if (!cohort.controlFilter)
|
|
224
|
-
throw `Missing controlFilter in queries.proteome.assays.${assayName}.cohorts.${
|
|
225
|
-
if (!cohort.caseFilter)
|
|
226
|
-
throw `Missing caseFilter in queries.proteome.assays.${assayName}.cohorts.${cohort.cohortName}`;
|
|
218
|
+
throw `Missing controlFilter in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
|
|
219
|
+
if (!cohort.caseFilter) throw `Missing caseFilter in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
|
|
227
220
|
}
|
|
228
221
|
} else {
|
|
229
222
|
throw `Invalid assay structure for "${assayName}". Must have .cohorts`;
|
|
@@ -232,15 +225,56 @@ async function validate_query_proteome(ds) {
|
|
|
232
225
|
q.find = async (arg) => {
|
|
233
226
|
const proteins = arg?.proteins;
|
|
234
227
|
if (!Array.isArray(proteins) || proteins.length == 0) throw "queries.proteome.find arg.proteins[] missing";
|
|
235
|
-
|
|
228
|
+
const matches = /* @__PURE__ */ new Set();
|
|
229
|
+
const details = arg?.proteomeDetails || {};
|
|
230
|
+
const assay = details.assay;
|
|
231
|
+
const cohort = details.cohort;
|
|
232
|
+
const MAX_FIND_RESULTS = 500;
|
|
233
|
+
const filters = [];
|
|
234
|
+
if (Object.keys(details).length) {
|
|
235
|
+
if (!assay || !cohort) throw "queries.proteome.find arg.proteomeDetails.{assay,cohort} missing";
|
|
236
|
+
const assayConfig = q.assays?.[assay];
|
|
237
|
+
if (!assayConfig) throw `queries.proteome.find invalid assay: ${assay}`;
|
|
238
|
+
const cohortConfig = assayConfig?.cohorts?.[cohort];
|
|
239
|
+
if (!cohortConfig) throw `queries.proteome.find invalid cohort: ${cohort}`;
|
|
240
|
+
const assayFilter = [{ columnIdx: assayConfig.columnIdx, columnValue: assayConfig.columnValue }];
|
|
241
|
+
const cohortFilter = (Array.isArray(cohortConfig.caseFilter) ? cohortConfig.caseFilter : []).filter(
|
|
242
|
+
(filter) => !!filter
|
|
243
|
+
);
|
|
244
|
+
if (!cohortFilter.length) throw `queries.proteome.find invalid cohort caseFilter: ${cohort}`;
|
|
245
|
+
filters.push(...assayFilter, ...cohortFilter);
|
|
246
|
+
}
|
|
247
|
+
for (const p of proteins) {
|
|
248
|
+
if (!p) continue;
|
|
249
|
+
const token = String(p).trim();
|
|
250
|
+
if (token.length < 2) continue;
|
|
251
|
+
const upperToken = `${token}\uFFFF`;
|
|
252
|
+
const rawRows = [];
|
|
253
|
+
if (filters?.length) {
|
|
254
|
+
const { conditions, params } = buildFilterClause(filters);
|
|
255
|
+
const sql = `SELECT DISTINCT gene, identifier FROM proteome_abundance WHERE gene >= ? COLLATE NOCASE AND gene < ? COLLATE NOCASE AND ${conditions.join(
|
|
256
|
+
" AND "
|
|
257
|
+
)} LIMIT ${MAX_FIND_RESULTS}`;
|
|
258
|
+
rawRows.push(...q.db.prepare(sql).all(token, upperToken, ...params));
|
|
259
|
+
} else {
|
|
260
|
+
rawRows.push(
|
|
261
|
+
...q.db.prepare(
|
|
262
|
+
`SELECT DISTINCT gene, identifier FROM proteome_abundance WHERE gene >= ? COLLATE NOCASE AND gene < ? COLLATE NOCASE LIMIT ${MAX_FIND_RESULTS}`
|
|
263
|
+
).all(token, upperToken)
|
|
264
|
+
);
|
|
265
|
+
}
|
|
266
|
+
for (const row of rawRows) {
|
|
267
|
+
if (!row?.gene || !row?.identifier) continue;
|
|
268
|
+
matches.add(`${row.gene}: ${row.identifier}`);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
return [...matches];
|
|
236
272
|
};
|
|
237
273
|
q.get = async (param) => {
|
|
238
274
|
if (!param?.terms?.length) throw "queries.proteome.get param.terms[] missing";
|
|
239
|
-
if (!param.proteomeDetails?.
|
|
240
|
-
throw "queries.proteome.get param.proteomeDetails.{
|
|
241
|
-
|
|
242
|
-
throw "queries.proteome.get param.proteomeDetails.{cohortControlFilter, cohortCaseFilter, assayColumnIdx, assayColumnValue} missing";
|
|
243
|
-
return await getProteomeValuesFromCohort(ds, param);
|
|
275
|
+
if (!param.proteomeDetails?.assay || !param.proteomeDetails?.cohort)
|
|
276
|
+
throw "queries.proteome.get param.proteomeDetails.{assay,cohort} missing";
|
|
277
|
+
return await getProteomeValuesFromCohort(ds, param, q);
|
|
244
278
|
};
|
|
245
279
|
}
|
|
246
280
|
const columnIdxToName = {
|
|
@@ -267,32 +301,26 @@ function buildFilterClause(filters) {
|
|
|
267
301
|
}
|
|
268
302
|
return { conditions, params };
|
|
269
303
|
}
|
|
270
|
-
function findProteinsInCohort(db, proteins) {
|
|
271
|
-
const matches = [];
|
|
272
|
-
for (const p of proteins) {
|
|
273
|
-
if (!p) continue;
|
|
274
|
-
const rows = db.prepare("SELECT DISTINCT gene, identifier FROM proteome_abundance WHERE gene LIKE ? COLLATE NOCASE").all(`%${p}%`);
|
|
275
|
-
for (const row of rows) {
|
|
276
|
-
if (row.gene.toLowerCase().includes(p.toLowerCase())) {
|
|
277
|
-
matches.push(`${row.gene}: ${row.identifier}`);
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
return matches;
|
|
282
|
-
}
|
|
283
304
|
function queryDbRows(db, matchColumn, matchValue, filters) {
|
|
284
|
-
console.log(`Querying DB for ${matchColumn}=${matchValue} with filters:`, filters);
|
|
285
305
|
const { conditions, params } = buildFilterClause(filters);
|
|
286
306
|
const allConditions = [`${matchColumn} = ? COLLATE NOCASE`, ...conditions];
|
|
287
|
-
const sql = `SELECT identifier, protein_accession, modsite, gene, sample, value
|
|
307
|
+
const sql = `SELECT identifier, protein_accession, isoform, modsite, gene, sample, value
|
|
288
308
|
FROM proteome_abundance
|
|
289
309
|
WHERE ${allConditions.join(" AND ")}`;
|
|
290
|
-
console.log("Executing SQL:", sql);
|
|
291
310
|
return db.prepare(sql).all(matchValue, ...params);
|
|
292
311
|
}
|
|
293
|
-
async function getProteomeValuesFromCohort(ds, param) {
|
|
312
|
+
async function getProteomeValuesFromCohort(ds, param, q) {
|
|
294
313
|
const db = ds.queries.proteome.db;
|
|
295
|
-
const {
|
|
314
|
+
const { assay, cohort } = param.proteomeDetails;
|
|
315
|
+
const assayConfig = q.assays?.[assay];
|
|
316
|
+
if (!assayConfig) throw `queries.proteome.get invalid assay: ${assay}`;
|
|
317
|
+
const PTMType = q.assays[assay].PTMType;
|
|
318
|
+
const assayColumnIdx = assayConfig.columnIdx;
|
|
319
|
+
const assayColumnValue = assayConfig.columnValue;
|
|
320
|
+
const cohortConfig = assayConfig?.cohorts?.[cohort];
|
|
321
|
+
if (!cohortConfig) throw `queries.proteome.get invalid cohort: ${cohort}`;
|
|
322
|
+
const cohortControlFilter = cohortConfig.controlFilter;
|
|
323
|
+
const cohortCaseFilter = cohortConfig.caseFilter;
|
|
296
324
|
const assayFilter = [{ columnIdx: assayColumnIdx, columnValue: assayColumnValue }];
|
|
297
325
|
const term2sample2value = /* @__PURE__ */ new Map();
|
|
298
326
|
const allEntries = [];
|
|
@@ -336,11 +364,13 @@ async function getProteomeValuesFromCohort(ds, param) {
|
|
|
336
364
|
if (!entryMap.has(row.identifier)) {
|
|
337
365
|
entryMap.set(row.identifier, {
|
|
338
366
|
uniqueIdentifier: row.identifier,
|
|
339
|
-
assayName,
|
|
340
|
-
cohortName,
|
|
367
|
+
assayName: assay,
|
|
368
|
+
cohortName: cohort,
|
|
341
369
|
PTMType,
|
|
342
370
|
modSites: PTMType ? row.modsite || void 0 : void 0,
|
|
343
371
|
proteinAccession: row.protein_accession,
|
|
372
|
+
isoform: row.isoform,
|
|
373
|
+
// refSeq transcript ID mapped from protein_accession
|
|
344
374
|
geneName: row.gene,
|
|
345
375
|
s2v: {}
|
|
346
376
|
});
|