@sjcrh/proteinpaint-server 2.178.0 → 2.179.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/routes/termdb.boxplot.js +7 -7
- package/routes/termdb.chat2.js +16 -4
- package/routes/termdb.config.js +19 -9
- package/routes/termdb.dmr.js +22 -11
- package/routes/termdb.sampleScatter.js +25 -22
- package/routes/termdb.singlecellSamples.js +2 -2
- package/routes/termdb.topVariablyExpressedGenes.js +1 -1
- package/routes/termdb.violin.js +17 -7
- package/src/app.js +514 -199
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.179.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -62,11 +62,11 @@
|
|
|
62
62
|
},
|
|
63
63
|
"dependencies": {
|
|
64
64
|
"@sjcrh/augen": "2.143.0",
|
|
65
|
-
"@sjcrh/proteinpaint-python": "2.
|
|
65
|
+
"@sjcrh/proteinpaint-python": "2.179.0",
|
|
66
66
|
"@sjcrh/proteinpaint-r": "2.178.0",
|
|
67
|
-
"@sjcrh/proteinpaint-rust": "2.
|
|
68
|
-
"@sjcrh/proteinpaint-shared": "2.
|
|
69
|
-
"@sjcrh/proteinpaint-types": "2.
|
|
67
|
+
"@sjcrh/proteinpaint-rust": "2.179.0",
|
|
68
|
+
"@sjcrh/proteinpaint-shared": "2.179.0",
|
|
69
|
+
"@sjcrh/proteinpaint-types": "2.179.0",
|
|
70
70
|
"@types/express": "^5.0.0",
|
|
71
71
|
"@types/express-session": "^1.18.1",
|
|
72
72
|
"better-sqlite3": "^12.4.1",
|
package/routes/termdb.boxplot.js
CHANGED
|
@@ -22,14 +22,14 @@ const api = {
|
|
|
22
22
|
function init({ genomes }) {
|
|
23
23
|
return async (req, res) => {
|
|
24
24
|
const q = req.query;
|
|
25
|
-
const genome = genomes[q.genome];
|
|
26
|
-
if (!genome) throw new Error("invalid genome name");
|
|
27
|
-
const ds = genome.datasets?.[q.dslabel];
|
|
28
|
-
if (!ds) throw new Error("invalid ds");
|
|
29
|
-
const terms = [q.tw];
|
|
30
|
-
if (q.overlayTw) terms.push(q.overlayTw);
|
|
31
|
-
if (q.divideTw) terms.push(q.divideTw);
|
|
32
25
|
try {
|
|
26
|
+
const genome = genomes[q.genome];
|
|
27
|
+
if (!genome) throw new Error("invalid genome name");
|
|
28
|
+
const ds = genome.datasets?.[q.dslabel];
|
|
29
|
+
if (!ds) throw new Error("invalid dslabel");
|
|
30
|
+
const terms = [q.tw];
|
|
31
|
+
if (q.overlayTw) terms.push(q.overlayTw);
|
|
32
|
+
if (q.divideTw) terms.push(q.divideTw);
|
|
33
33
|
const data = await getData(
|
|
34
34
|
{ filter: q.filter, filter0: q.filter0, terms, __protected__: q.__protected__, __abortSignal: q.__abortSignal },
|
|
35
35
|
ds
|
package/routes/termdb.chat2.js
CHANGED
|
@@ -7,6 +7,7 @@ import { extract_DE_search_terms_from_query } from "./chat/DEagent.ts";
|
|
|
7
7
|
import { extract_summary_terms } from "./chat/summaryagent.ts";
|
|
8
8
|
import { extract_matrix_search_terms_from_query } from "./chat/matrixagent.ts";
|
|
9
9
|
import { extract_samplescatter_terms_from_query } from "./chat/samplescatteragent.ts";
|
|
10
|
+
import { extract_hiercluster_terms_from_query } from "./chat/hierclusteragent.ts";
|
|
10
11
|
import { parse_dataset_db, parse_geneset_db, getGenesetNames } from "./chat/utils.ts";
|
|
11
12
|
import serverconfig from "../src/serverconfig.js";
|
|
12
13
|
import { mayLog } from "#src/helpers.ts";
|
|
@@ -50,7 +51,6 @@ function init({ genomes }) {
|
|
|
50
51
|
const ai_output_json = await run_chat_pipeline(
|
|
51
52
|
q.prompt,
|
|
52
53
|
llm,
|
|
53
|
-
serverconfig.aiRoute,
|
|
54
54
|
dataset_json,
|
|
55
55
|
testing,
|
|
56
56
|
dataset_db,
|
|
@@ -65,7 +65,7 @@ function init({ genomes }) {
|
|
|
65
65
|
}
|
|
66
66
|
};
|
|
67
67
|
}
|
|
68
|
-
async function run_chat_pipeline(user_prompt, llm,
|
|
68
|
+
async function run_chat_pipeline(user_prompt, llm, dataset_json, testing, dataset_db, genedb, ds, genesetNames = []) {
|
|
69
69
|
const time1 = (/* @__PURE__ */ new Date()).valueOf();
|
|
70
70
|
const class_response = await classifyQuery(user_prompt, llm);
|
|
71
71
|
let ai_output_json;
|
|
@@ -84,9 +84,8 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
|
|
|
84
84
|
}
|
|
85
85
|
} else if (class_response.type == "plot") {
|
|
86
86
|
const classResult = await classifyPlotType(user_prompt, llm);
|
|
87
|
-
mayLog("classResult:", classResult);
|
|
88
87
|
const dataset_db_output = await parse_dataset_db(dataset_db);
|
|
89
|
-
const genes_list =
|
|
88
|
+
const genes_list = await parse_geneset_db(genedb);
|
|
90
89
|
if (classResult == "summary") {
|
|
91
90
|
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
92
91
|
ai_output_json = await extract_summary_terms(
|
|
@@ -139,6 +138,19 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
|
|
|
139
138
|
genesetNames
|
|
140
139
|
);
|
|
141
140
|
mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
|
|
141
|
+
} else if (classResult == "hiercluster") {
|
|
142
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
143
|
+
ai_output_json = await extract_hiercluster_terms_from_query(
|
|
144
|
+
user_prompt,
|
|
145
|
+
llm,
|
|
146
|
+
dataset_db_output,
|
|
147
|
+
dataset_json,
|
|
148
|
+
genes_list,
|
|
149
|
+
ds,
|
|
150
|
+
testing,
|
|
151
|
+
genesetNames
|
|
152
|
+
);
|
|
153
|
+
mayLog("Time taken for hierCluster agent:", formatElapsedTime(Date.now() - time12));
|
|
142
154
|
} else {
|
|
143
155
|
ai_output_json = { type: "text", text: "Unknown classification value" };
|
|
144
156
|
}
|
package/routes/termdb.config.js
CHANGED
|
@@ -1,7 +1,16 @@
|
|
|
1
1
|
import serverconfig from "#src/serverconfig.js";
|
|
2
2
|
import { authApi } from "#src/auth.js";
|
|
3
3
|
import { get_ds_tdb } from "#src/termdb.js";
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
TermTypeGroups,
|
|
6
|
+
SINGLECELL_CELLTYPE,
|
|
7
|
+
GENE_EXPRESSION,
|
|
8
|
+
METABOLITE_INTENSITY,
|
|
9
|
+
WHOLE_PROTEOME_ABUNDANCE,
|
|
10
|
+
SINGLECELL_GENE_EXPRESSION,
|
|
11
|
+
DNA_METHYLATION,
|
|
12
|
+
SSGSEA
|
|
13
|
+
} from "#shared/terms.js";
|
|
5
14
|
const api = {
|
|
6
15
|
endpoint: "termdb/config",
|
|
7
16
|
methods: {
|
|
@@ -255,7 +264,8 @@ function addNonDictionaryQueries(c, ds, genome) {
|
|
|
255
264
|
q2.singleCell.DEgenes = { termId: q.singleCell.DEgenes.termId };
|
|
256
265
|
}
|
|
257
266
|
if (q.singleCell?.terms?.length) {
|
|
258
|
-
c.
|
|
267
|
+
if (!c.termType2terms) c.termType2terms = {};
|
|
268
|
+
c.termType2terms[TermTypeGroups.SINGLECELL_CELLTYPE] = q.singleCell.terms;
|
|
259
269
|
}
|
|
260
270
|
}
|
|
261
271
|
if (q.images) {
|
|
@@ -270,14 +280,14 @@ function getAllowedTermTypes(ds) {
|
|
|
270
280
|
if (ds.cohort.termdb.allowedTermTypes) {
|
|
271
281
|
for (const t of ds.cohort.termdb.allowedTermTypes) typeSet.add(t);
|
|
272
282
|
}
|
|
273
|
-
if (ds.queries?.geneExpression) typeSet.add(
|
|
274
|
-
if (ds.queries?.metaboliteIntensity) typeSet.add(
|
|
275
|
-
if (ds.queries?.proteome?.whole) typeSet.add(
|
|
276
|
-
if (ds.queries?.ssGSEA) typeSet.add(
|
|
277
|
-
if (ds.queries?.dnaMethylation) typeSet.add(
|
|
283
|
+
if (ds.queries?.geneExpression) typeSet.add(GENE_EXPRESSION);
|
|
284
|
+
if (ds.queries?.metaboliteIntensity) typeSet.add(METABOLITE_INTENSITY);
|
|
285
|
+
if (ds.queries?.proteome?.whole) typeSet.add(WHOLE_PROTEOME_ABUNDANCE);
|
|
286
|
+
if (ds.queries?.ssGSEA) typeSet.add(SSGSEA);
|
|
287
|
+
if (ds.queries?.dnaMethylation) typeSet.add(DNA_METHYLATION);
|
|
278
288
|
if (ds.queries?.singleCell) {
|
|
279
|
-
typeSet.add(
|
|
280
|
-
if (ds.queries.singleCell?.geneExpression) typeSet.add(
|
|
289
|
+
typeSet.add(SINGLECELL_CELLTYPE);
|
|
290
|
+
if (ds.queries.singleCell?.geneExpression) typeSet.add(SINGLECELL_GENE_EXPRESSION);
|
|
281
291
|
}
|
|
282
292
|
if (ds.cohort.termdb.termCollections?.length) typeSet.add("termCollection");
|
|
283
293
|
return [...typeSet];
|
package/routes/termdb.dmr.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { TermdbDmrPayload } from "#types/checkers";
|
|
2
|
-
import {
|
|
2
|
+
import { run_python } from "@sjcrh/proteinpaint-python";
|
|
3
3
|
import { invalidcoord } from "#shared/common.js";
|
|
4
|
+
import { mayLog } from "#src/helpers.ts";
|
|
5
|
+
import { formatElapsedTime } from "#shared";
|
|
4
6
|
const api = {
|
|
5
7
|
endpoint: "termdb/dmr",
|
|
6
8
|
methods: {
|
|
@@ -22,24 +24,33 @@ function init({ genomes }) {
|
|
|
22
24
|
if (!genome) throw new Error("invalid genome");
|
|
23
25
|
const ds = genome.datasets?.[q.dslabel];
|
|
24
26
|
if (!ds) throw new Error("invalid ds");
|
|
25
|
-
if (!ds.queries?.dnaMethylation) throw new Error("not supported");
|
|
27
|
+
if (!ds.queries?.dnaMethylation) throw new Error("analysis not supported");
|
|
26
28
|
if (!Array.isArray(q.group1) || q.group1.length == 0) throw new Error("group1 not non empty array");
|
|
27
29
|
if (!Array.isArray(q.group2) || q.group2.length == 0) throw new Error("group2 not non empty array");
|
|
28
30
|
if (invalidcoord(genome, q.chr, q.start, q.stop)) throw new Error("invalid chr/start/stop");
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
const group1 = q.group1.map((s) => s.sample).filter(Boolean);
|
|
32
|
+
const group2 = q.group2.map((s) => s.sample).filter(Boolean);
|
|
33
|
+
if (group1.length < 3) throw new Error(`Need at least 3 samples in group1, got ${group1.length}`);
|
|
34
|
+
if (group2.length < 3) throw new Error(`Need at least 3 samples in group2, got ${group2.length}`);
|
|
35
|
+
const gpdmInput = {
|
|
36
|
+
h5file: ds.queries.dnaMethylation.file,
|
|
34
37
|
chr: q.chr,
|
|
35
38
|
start: q.start,
|
|
36
|
-
stop: q.stop
|
|
39
|
+
stop: q.stop,
|
|
40
|
+
group1,
|
|
41
|
+
group2,
|
|
42
|
+
annotations: q.annotations || [],
|
|
43
|
+
nan_threshold: q.nan_threshold ?? 0.5
|
|
37
44
|
};
|
|
38
|
-
const
|
|
45
|
+
const time1 = Date.now();
|
|
46
|
+
const result = JSON.parse(await run_python("gpdm_analysis.py", JSON.stringify(gpdmInput)));
|
|
47
|
+
mayLog("DMR analysis time:", formatElapsedTime(Date.now() - time1));
|
|
39
48
|
if (result.error) throw new Error(result.error);
|
|
40
|
-
res.send(result);
|
|
49
|
+
res.send({ status: "ok", dmrs: result.dmrs });
|
|
41
50
|
} catch (e) {
|
|
42
|
-
|
|
51
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
52
|
+
res.send({ error: msg });
|
|
53
|
+
if (e instanceof Error && e.stack) console.log(e);
|
|
43
54
|
}
|
|
44
55
|
};
|
|
45
56
|
}
|
|
@@ -7,6 +7,7 @@ import { authApi } from "../src/auth.js";
|
|
|
7
7
|
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
8
8
|
import { read_file } from "../src/utils.js";
|
|
9
9
|
import { getDescrStats } from "#routes/termdb.descrstats.ts";
|
|
10
|
+
import { SINGLECELL_GENE_EXPRESSION, isSingleCellTerm } from "#shared/terms.js";
|
|
10
11
|
const api = {
|
|
11
12
|
endpoint: "termdb/sampleScatter",
|
|
12
13
|
methods: {
|
|
@@ -23,15 +24,15 @@ const api = {
|
|
|
23
24
|
const refColor = "#F5F5DC";
|
|
24
25
|
function init({ genomes }) {
|
|
25
26
|
return async function(req, res) {
|
|
26
|
-
const q = req.query;
|
|
27
|
-
if (!q.genome || !q.dslabel) {
|
|
28
|
-
throw new Error("Genome and dataset label are required for termdb/sampleScatter request.");
|
|
29
|
-
}
|
|
30
|
-
const g = genomes[q.genome];
|
|
31
|
-
const ds = g.datasets[q.dslabel];
|
|
32
|
-
if (q.singleCellPlot)
|
|
33
|
-
return getSingleCellScatter(req, res, ds);
|
|
34
27
|
try {
|
|
28
|
+
const q = req.query;
|
|
29
|
+
if (!q.genome || !q.dslabel) {
|
|
30
|
+
throw new Error("Genome and dataset label are required for termdb/sampleScatter request.");
|
|
31
|
+
}
|
|
32
|
+
const g = genomes[q.genome];
|
|
33
|
+
const ds = g.datasets[q.dslabel];
|
|
34
|
+
if (q.singleCellPlot)
|
|
35
|
+
return getSingleCellScatter(req, res, ds);
|
|
35
36
|
let refSamples = [], cohortSamples;
|
|
36
37
|
const terms = [];
|
|
37
38
|
if (q.colorTW) terms.push(q.colorTW);
|
|
@@ -121,12 +122,11 @@ async function getSingleCellScatter(req, res, ds) {
|
|
|
121
122
|
const { name, sample } = q.singleCellPlot;
|
|
122
123
|
try {
|
|
123
124
|
const tw = q.colorTW;
|
|
125
|
+
if (!tw || !isSingleCellTerm(tw.term))
|
|
126
|
+
throw new Error("colorTW must be provided and be a single cell term for single cell scatter plot");
|
|
124
127
|
const arg = { plots: [name], sample };
|
|
125
|
-
if (tw)
|
|
126
|
-
|
|
127
|
-
else if (tw.term.type == "singleCellCellType") arg.colorBy = tw.term.name;
|
|
128
|
-
else throw new Error("unsupported tw");
|
|
129
|
-
}
|
|
128
|
+
if (tw.term.type == SINGLECELL_GENE_EXPRESSION) arg.gene = tw.term.gene;
|
|
129
|
+
else arg.colorBy = tw.term.name;
|
|
130
130
|
const data = await ds.queries.singleCell.data.get(arg);
|
|
131
131
|
const plot = data.plots[0];
|
|
132
132
|
const cells = [...plot.expCells, ...plot.noExpCells];
|
|
@@ -142,7 +142,8 @@ async function getSingleCellScatter(req, res, ds) {
|
|
|
142
142
|
z: 0,
|
|
143
143
|
category: cell.category,
|
|
144
144
|
shape: "Ref",
|
|
145
|
-
hidden
|
|
145
|
+
hidden,
|
|
146
|
+
geneExp: cell.geneExp
|
|
146
147
|
};
|
|
147
148
|
});
|
|
148
149
|
const [xMin, xMax, yMin, yMax] = samples.reduce(
|
|
@@ -151,14 +152,16 @@ async function getSingleCellScatter(req, res, ds) {
|
|
|
151
152
|
);
|
|
152
153
|
const categories = new Set(samples.map((s) => s.category));
|
|
153
154
|
const colorMap = {};
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
const
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
155
|
+
if (tw.term.type != SINGLECELL_GENE_EXPRESSION) {
|
|
156
|
+
const k2c = getColors(categories.size);
|
|
157
|
+
for (const category of categories) {
|
|
158
|
+
const color = k2c(category);
|
|
159
|
+
colorMap[category] = {
|
|
160
|
+
sampleCount: samples.filter((s) => s.category == category).length,
|
|
161
|
+
color,
|
|
162
|
+
key: category
|
|
163
|
+
};
|
|
164
|
+
}
|
|
162
165
|
}
|
|
163
166
|
const shapeLegend = [["Ref", { sampleCount: samples.length, shape: 0, key: "Ref" }]];
|
|
164
167
|
const colorLegend = Object.entries(colorMap);
|
|
@@ -9,7 +9,7 @@ import { termdbSingleCellSamplesPayload } from "#types/checkers";
|
|
|
9
9
|
import { validate_query_singleCell_DEgenes } from "./termdb.singlecellDEgenes.ts";
|
|
10
10
|
import { gdc_validate_query_singleCell_data } from "#src/mds3.gdc.js";
|
|
11
11
|
import ky from "ky";
|
|
12
|
-
import {
|
|
12
|
+
import { SINGLECELL_CELLTYPE } from "#shared/terms.js";
|
|
13
13
|
const api = {
|
|
14
14
|
endpoint: "termdb/singlecellSamples",
|
|
15
15
|
methods: {
|
|
@@ -252,7 +252,7 @@ function colorColumn2terms(plots, ds) {
|
|
|
252
252
|
/** TODO: possible term may apply to multiple plots.
|
|
253
253
|
* May need to change to plots: [] */
|
|
254
254
|
plot: plot.name,
|
|
255
|
-
type:
|
|
255
|
+
type: SINGLECELL_CELLTYPE,
|
|
256
256
|
groupsetting: {},
|
|
257
257
|
values: baseValues.reduce((acc, v) => {
|
|
258
258
|
const alias = c?.aliases?.[v];
|
|
@@ -70,7 +70,7 @@ function nativeValidateQuery(ds) {
|
|
|
70
70
|
}
|
|
71
71
|
} else {
|
|
72
72
|
for (const i of gE.samples) {
|
|
73
|
-
const n = ds.cohort.termdb.q.id2sampleName(i
|
|
73
|
+
const n = ds.cohort.termdb.q.id2sampleName(i);
|
|
74
74
|
if (!n) throw "sample id cannot convert to string name";
|
|
75
75
|
samples.push(n);
|
|
76
76
|
}
|
package/routes/termdb.violin.js
CHANGED
|
@@ -190,9 +190,10 @@ async function createCanvasImg(q, result, ds) {
|
|
|
190
190
|
const chart = result.charts[k];
|
|
191
191
|
const plot2Values = {};
|
|
192
192
|
for (const plot of chart.plots) plot2Values[plot.label] = plot.values;
|
|
193
|
-
const densities = await getDensities(plot2Values);
|
|
194
|
-
let axisScale;
|
|
195
193
|
const useLog = q.unit == "log";
|
|
194
|
+
const logBase = ds.cohort.termdb.logscaleBase2 ? 2 : 10;
|
|
195
|
+
const densities = await getDensities(plot2Values, useLog, logBase);
|
|
196
|
+
let axisScale;
|
|
196
197
|
if (useLog) {
|
|
197
198
|
axisScale = scaleLog().base(ds.cohort.termdb.logscaleBase2 ? 2 : 10).domain([result.min, result.max]).range(isH ? [0, q.svgw] : [q.svgw, 0]);
|
|
198
199
|
} else {
|
|
@@ -241,8 +242,16 @@ async function getDensity(values) {
|
|
|
241
242
|
const result = await getDensities({ plot: values });
|
|
242
243
|
return result.plot;
|
|
243
244
|
}
|
|
244
|
-
async function getDensities(plot2Values) {
|
|
245
|
-
|
|
245
|
+
async function getDensities(plot2Values, useLog = false, logBase = 10) {
|
|
246
|
+
let transformedPlot2Values = {};
|
|
247
|
+
if (useLog) {
|
|
248
|
+
for (const plot in plot2Values) {
|
|
249
|
+
transformedPlot2Values[plot] = plot2Values[plot].filter((v) => v > 0).map((v) => Math.log(v) / Math.log(logBase));
|
|
250
|
+
}
|
|
251
|
+
} else {
|
|
252
|
+
transformedPlot2Values = plot2Values;
|
|
253
|
+
}
|
|
254
|
+
const plot2Density = JSON.parse(await run_R("density.R", JSON.stringify({ plot2Values: transformedPlot2Values })));
|
|
246
255
|
const densities = {};
|
|
247
256
|
for (const plot in plot2Density) {
|
|
248
257
|
const result = plot2Density[plot];
|
|
@@ -253,11 +262,12 @@ async function getDensities(plot2Values) {
|
|
|
253
262
|
let xMax = -Infinity;
|
|
254
263
|
for (const [i, x] of Object.entries(result.x)) {
|
|
255
264
|
const density2 = result.y[i];
|
|
256
|
-
|
|
257
|
-
|
|
265
|
+
const x0 = useLog ? Math.pow(logBase, x) : x;
|
|
266
|
+
xMin = Math.min(xMin, x0);
|
|
267
|
+
xMax = Math.max(xMax, x0);
|
|
258
268
|
densityMin = Math.min(densityMin, density2);
|
|
259
269
|
densityMax = Math.max(densityMax, density2);
|
|
260
|
-
bins.push({ x0
|
|
270
|
+
bins.push({ x0, density: density2 });
|
|
261
271
|
}
|
|
262
272
|
bins.unshift({ x0: xMin, density: densityMin });
|
|
263
273
|
bins.push({ x0: xMax, density: densityMin });
|