@sjcrh/proteinpaint-server 2.178.0 → 2.179.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.178.0",
3
+ "version": "2.179.0",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -62,11 +62,11 @@
62
62
  },
63
63
  "dependencies": {
64
64
  "@sjcrh/augen": "2.143.0",
65
- "@sjcrh/proteinpaint-python": "2.178.0",
65
+ "@sjcrh/proteinpaint-python": "2.179.0",
66
66
  "@sjcrh/proteinpaint-r": "2.178.0",
67
- "@sjcrh/proteinpaint-rust": "2.178.0",
68
- "@sjcrh/proteinpaint-shared": "2.178.0",
69
- "@sjcrh/proteinpaint-types": "2.178.0",
67
+ "@sjcrh/proteinpaint-rust": "2.179.0",
68
+ "@sjcrh/proteinpaint-shared": "2.179.0",
69
+ "@sjcrh/proteinpaint-types": "2.179.0",
70
70
  "@types/express": "^5.0.0",
71
71
  "@types/express-session": "^1.18.1",
72
72
  "better-sqlite3": "^12.4.1",
@@ -22,14 +22,14 @@ const api = {
22
22
  function init({ genomes }) {
23
23
  return async (req, res) => {
24
24
  const q = req.query;
25
- const genome = genomes[q.genome];
26
- if (!genome) throw new Error("invalid genome name");
27
- const ds = genome.datasets?.[q.dslabel];
28
- if (!ds) throw new Error("invalid ds");
29
- const terms = [q.tw];
30
- if (q.overlayTw) terms.push(q.overlayTw);
31
- if (q.divideTw) terms.push(q.divideTw);
32
25
  try {
26
+ const genome = genomes[q.genome];
27
+ if (!genome) throw new Error("invalid genome name");
28
+ const ds = genome.datasets?.[q.dslabel];
29
+ if (!ds) throw new Error("invalid dslabel");
30
+ const terms = [q.tw];
31
+ if (q.overlayTw) terms.push(q.overlayTw);
32
+ if (q.divideTw) terms.push(q.divideTw);
33
33
  const data = await getData(
34
34
  { filter: q.filter, filter0: q.filter0, terms, __protected__: q.__protected__, __abortSignal: q.__abortSignal },
35
35
  ds
@@ -7,6 +7,7 @@ import { extract_DE_search_terms_from_query } from "./chat/DEagent.ts";
7
7
  import { extract_summary_terms } from "./chat/summaryagent.ts";
8
8
  import { extract_matrix_search_terms_from_query } from "./chat/matrixagent.ts";
9
9
  import { extract_samplescatter_terms_from_query } from "./chat/samplescatteragent.ts";
10
+ import { extract_hiercluster_terms_from_query } from "./chat/hierclusteragent.ts";
10
11
  import { parse_dataset_db, parse_geneset_db, getGenesetNames } from "./chat/utils.ts";
11
12
  import serverconfig from "../src/serverconfig.js";
12
13
  import { mayLog } from "#src/helpers.ts";
@@ -50,7 +51,6 @@ function init({ genomes }) {
50
51
  const ai_output_json = await run_chat_pipeline(
51
52
  q.prompt,
52
53
  llm,
53
- serverconfig.aiRoute,
54
54
  dataset_json,
55
55
  testing,
56
56
  dataset_db,
@@ -65,7 +65,7 @@ function init({ genomes }) {
65
65
  }
66
66
  };
67
67
  }
68
- async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testing, dataset_db, genedb, ds, genesetNames = []) {
68
+ async function run_chat_pipeline(user_prompt, llm, dataset_json, testing, dataset_db, genedb, ds, genesetNames = []) {
69
69
  const time1 = (/* @__PURE__ */ new Date()).valueOf();
70
70
  const class_response = await classifyQuery(user_prompt, llm);
71
71
  let ai_output_json;
@@ -84,9 +84,8 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
84
84
  }
85
85
  } else if (class_response.type == "plot") {
86
86
  const classResult = await classifyPlotType(user_prompt, llm);
87
- mayLog("classResult:", classResult);
88
87
  const dataset_db_output = await parse_dataset_db(dataset_db);
89
- const genes_list = dataset_json.hasGeneExpression ? await parse_geneset_db(genedb) : [];
88
+ const genes_list = await parse_geneset_db(genedb);
90
89
  if (classResult == "summary") {
91
90
  const time12 = (/* @__PURE__ */ new Date()).valueOf();
92
91
  ai_output_json = await extract_summary_terms(
@@ -139,6 +138,19 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
139
138
  genesetNames
140
139
  );
141
140
  mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
141
+ } else if (classResult == "hiercluster") {
142
+ const time12 = (/* @__PURE__ */ new Date()).valueOf();
143
+ ai_output_json = await extract_hiercluster_terms_from_query(
144
+ user_prompt,
145
+ llm,
146
+ dataset_db_output,
147
+ dataset_json,
148
+ genes_list,
149
+ ds,
150
+ testing,
151
+ genesetNames
152
+ );
153
+ mayLog("Time taken for hierCluster agent:", formatElapsedTime(Date.now() - time12));
142
154
  } else {
143
155
  ai_output_json = { type: "text", text: "Unknown classification value" };
144
156
  }
@@ -1,7 +1,16 @@
1
1
  import serverconfig from "#src/serverconfig.js";
2
2
  import { authApi } from "#src/auth.js";
3
3
  import { get_ds_tdb } from "#src/termdb.js";
4
- import { TermTypes } from "#shared/terms.js";
4
+ import {
5
+ TermTypeGroups,
6
+ SINGLECELL_CELLTYPE,
7
+ GENE_EXPRESSION,
8
+ METABOLITE_INTENSITY,
9
+ WHOLE_PROTEOME_ABUNDANCE,
10
+ SINGLECELL_GENE_EXPRESSION,
11
+ DNA_METHYLATION,
12
+ SSGSEA
13
+ } from "#shared/terms.js";
5
14
  const api = {
6
15
  endpoint: "termdb/config",
7
16
  methods: {
@@ -255,7 +264,8 @@ function addNonDictionaryQueries(c, ds, genome) {
255
264
  q2.singleCell.DEgenes = { termId: q.singleCell.DEgenes.termId };
256
265
  }
257
266
  if (q.singleCell?.terms?.length) {
258
- c.scctTerms = q.singleCell.terms;
267
+ if (!c.termType2terms) c.termType2terms = {};
268
+ c.termType2terms[TermTypeGroups.SINGLECELL_CELLTYPE] = q.singleCell.terms;
259
269
  }
260
270
  }
261
271
  if (q.images) {
@@ -270,14 +280,14 @@ function getAllowedTermTypes(ds) {
270
280
  if (ds.cohort.termdb.allowedTermTypes) {
271
281
  for (const t of ds.cohort.termdb.allowedTermTypes) typeSet.add(t);
272
282
  }
273
- if (ds.queries?.geneExpression) typeSet.add(TermTypes.GENE_EXPRESSION);
274
- if (ds.queries?.metaboliteIntensity) typeSet.add(TermTypes.METABOLITE_INTENSITY);
275
- if (ds.queries?.proteome?.whole) typeSet.add(TermTypes.WHOLE_PROTEOME_ABUNDANCE);
276
- if (ds.queries?.ssGSEA) typeSet.add(TermTypes.SSGSEA);
277
- if (ds.queries?.dnaMethylation) typeSet.add(TermTypes.DNA_METHYLATION);
283
+ if (ds.queries?.geneExpression) typeSet.add(GENE_EXPRESSION);
284
+ if (ds.queries?.metaboliteIntensity) typeSet.add(METABOLITE_INTENSITY);
285
+ if (ds.queries?.proteome?.whole) typeSet.add(WHOLE_PROTEOME_ABUNDANCE);
286
+ if (ds.queries?.ssGSEA) typeSet.add(SSGSEA);
287
+ if (ds.queries?.dnaMethylation) typeSet.add(DNA_METHYLATION);
278
288
  if (ds.queries?.singleCell) {
279
- typeSet.add(TermTypes.SINGLECELL_CELLTYPE);
280
- if (ds.queries.singleCell?.geneExpression) typeSet.add(TermTypes.SINGLECELL_GENE_EXPRESSION);
289
+ typeSet.add(SINGLECELL_CELLTYPE);
290
+ if (ds.queries.singleCell?.geneExpression) typeSet.add(SINGLECELL_GENE_EXPRESSION);
281
291
  }
282
292
  if (ds.cohort.termdb.termCollections?.length) typeSet.add("termCollection");
283
293
  return [...typeSet];
@@ -1,6 +1,8 @@
1
1
  import { TermdbDmrPayload } from "#types/checkers";
2
- import { run_R } from "@sjcrh/proteinpaint-r";
2
+ import { run_python } from "@sjcrh/proteinpaint-python";
3
3
  import { invalidcoord } from "#shared/common.js";
4
+ import { mayLog } from "#src/helpers.ts";
5
+ import { formatElapsedTime } from "#shared";
4
6
  const api = {
5
7
  endpoint: "termdb/dmr",
6
8
  methods: {
@@ -22,24 +24,33 @@ function init({ genomes }) {
22
24
  if (!genome) throw new Error("invalid genome");
23
25
  const ds = genome.datasets?.[q.dslabel];
24
26
  if (!ds) throw new Error("invalid ds");
25
- if (!ds.queries?.dnaMethylation) throw new Error("not supported");
27
+ if (!ds.queries?.dnaMethylation) throw new Error("analysis not supported");
26
28
  if (!Array.isArray(q.group1) || q.group1.length == 0) throw new Error("group1 not non empty array");
27
29
  if (!Array.isArray(q.group2) || q.group2.length == 0) throw new Error("group2 not non empty array");
28
30
  if (invalidcoord(genome, q.chr, q.start, q.stop)) throw new Error("invalid chr/start/stop");
29
- const arg = {
30
- group1: q.group1,
31
- group2: q.group2,
32
- file: ds.queries.dnaMethylation.file,
33
- // todo change file to mValueFile
31
+ const group1 = q.group1.map((s) => s.sample).filter(Boolean);
32
+ const group2 = q.group2.map((s) => s.sample).filter(Boolean);
33
+ if (group1.length < 3) throw new Error(`Need at least 3 samples in group1, got ${group1.length}`);
34
+ if (group2.length < 3) throw new Error(`Need at least 3 samples in group2, got ${group2.length}`);
35
+ const gpdmInput = {
36
+ h5file: ds.queries.dnaMethylation.file,
34
37
  chr: q.chr,
35
38
  start: q.start,
36
- stop: q.stop
39
+ stop: q.stop,
40
+ group1,
41
+ group2,
42
+ annotations: q.annotations || [],
43
+ nan_threshold: q.nan_threshold ?? 0.5
37
44
  };
38
- const result = JSON.parse(await run_R("dmr.R", JSON.stringify(arg)));
45
+ const time1 = Date.now();
46
+ const result = JSON.parse(await run_python("gpdm_analysis.py", JSON.stringify(gpdmInput)));
47
+ mayLog("DMR analysis time:", formatElapsedTime(Date.now() - time1));
39
48
  if (result.error) throw new Error(result.error);
40
- res.send(result);
49
+ res.send({ status: "ok", dmrs: result.dmrs });
41
50
  } catch (e) {
42
- res.send({ error: e.message || e });
51
+ const msg = e instanceof Error ? e.message : String(e);
52
+ res.send({ error: msg });
53
+ if (e instanceof Error && e.stack) console.log(e);
43
54
  }
44
55
  };
45
56
  }
@@ -7,6 +7,7 @@ import { authApi } from "../src/auth.js";
7
7
  import { run_R } from "@sjcrh/proteinpaint-r";
8
8
  import { read_file } from "../src/utils.js";
9
9
  import { getDescrStats } from "#routes/termdb.descrstats.ts";
10
+ import { SINGLECELL_GENE_EXPRESSION, isSingleCellTerm } from "#shared/terms.js";
10
11
  const api = {
11
12
  endpoint: "termdb/sampleScatter",
12
13
  methods: {
@@ -23,15 +24,15 @@ const api = {
23
24
  const refColor = "#F5F5DC";
24
25
  function init({ genomes }) {
25
26
  return async function(req, res) {
26
- const q = req.query;
27
- if (!q.genome || !q.dslabel) {
28
- throw new Error("Genome and dataset label are required for termdb/sampleScatter request.");
29
- }
30
- const g = genomes[q.genome];
31
- const ds = g.datasets[q.dslabel];
32
- if (q.singleCellPlot)
33
- return getSingleCellScatter(req, res, ds);
34
27
  try {
28
+ const q = req.query;
29
+ if (!q.genome || !q.dslabel) {
30
+ throw new Error("Genome and dataset label are required for termdb/sampleScatter request.");
31
+ }
32
+ const g = genomes[q.genome];
33
+ const ds = g.datasets[q.dslabel];
34
+ if (q.singleCellPlot)
35
+ return getSingleCellScatter(req, res, ds);
35
36
  let refSamples = [], cohortSamples;
36
37
  const terms = [];
37
38
  if (q.colorTW) terms.push(q.colorTW);
@@ -121,12 +122,11 @@ async function getSingleCellScatter(req, res, ds) {
121
122
  const { name, sample } = q.singleCellPlot;
122
123
  try {
123
124
  const tw = q.colorTW;
125
+ if (!tw || !isSingleCellTerm(tw.term))
126
+ throw new Error("colorTW must be provided and be a single cell term for single cell scatter plot");
124
127
  const arg = { plots: [name], sample };
125
- if (tw) {
126
- if (tw.term.type == "singleCellGeneExpression") arg.gene = tw.term.gene;
127
- else if (tw.term.type == "singleCellCellType") arg.colorBy = tw.term.name;
128
- else throw new Error("unsupported tw");
129
- }
128
+ if (tw.term.type == SINGLECELL_GENE_EXPRESSION) arg.gene = tw.term.gene;
129
+ else arg.colorBy = tw.term.name;
130
130
  const data = await ds.queries.singleCell.data.get(arg);
131
131
  const plot = data.plots[0];
132
132
  const cells = [...plot.expCells, ...plot.noExpCells];
@@ -142,7 +142,8 @@ async function getSingleCellScatter(req, res, ds) {
142
142
  z: 0,
143
143
  category: cell.category,
144
144
  shape: "Ref",
145
- hidden
145
+ hidden,
146
+ geneExp: cell.geneExp
146
147
  };
147
148
  });
148
149
  const [xMin, xMax, yMin, yMax] = samples.reduce(
@@ -151,14 +152,16 @@ async function getSingleCellScatter(req, res, ds) {
151
152
  );
152
153
  const categories = new Set(samples.map((s) => s.category));
153
154
  const colorMap = {};
154
- const k2c = getColors(categories.size);
155
- for (const category of categories) {
156
- const color = k2c(category);
157
- colorMap[category] = {
158
- sampleCount: samples.filter((s) => s.category == category).length,
159
- color,
160
- key: category
161
- };
155
+ if (tw.term.type != SINGLECELL_GENE_EXPRESSION) {
156
+ const k2c = getColors(categories.size);
157
+ for (const category of categories) {
158
+ const color = k2c(category);
159
+ colorMap[category] = {
160
+ sampleCount: samples.filter((s) => s.category == category).length,
161
+ color,
162
+ key: category
163
+ };
164
+ }
162
165
  }
163
166
  const shapeLegend = [["Ref", { sampleCount: samples.length, shape: 0, key: "Ref" }]];
164
167
  const colorLegend = Object.entries(colorMap);
@@ -9,7 +9,7 @@ import { termdbSingleCellSamplesPayload } from "#types/checkers";
9
9
  import { validate_query_singleCell_DEgenes } from "./termdb.singlecellDEgenes.ts";
10
10
  import { gdc_validate_query_singleCell_data } from "#src/mds3.gdc.js";
11
11
  import ky from "ky";
12
- import { TermTypes } from "#shared/terms.js";
12
+ import { SINGLECELL_CELLTYPE } from "#shared/terms.js";
13
13
  const api = {
14
14
  endpoint: "termdb/singlecellSamples",
15
15
  methods: {
@@ -252,7 +252,7 @@ function colorColumn2terms(plots, ds) {
252
252
  /** TODO: possible term may apply to multiple plots.
253
253
  * May need to change to plots: [] */
254
254
  plot: plot.name,
255
- type: TermTypes.SINGLECELL_CELLTYPE,
255
+ type: SINGLECELL_CELLTYPE,
256
256
  groupsetting: {},
257
257
  values: baseValues.reduce((acc, v) => {
258
258
  const alias = c?.aliases?.[v];
@@ -70,7 +70,7 @@ function nativeValidateQuery(ds) {
70
70
  }
71
71
  } else {
72
72
  for (const i of gE.samples) {
73
- const n = ds.cohort.termdb.q.id2sampleName(i.id);
73
+ const n = ds.cohort.termdb.q.id2sampleName(i);
74
74
  if (!n) throw "sample id cannot convert to string name";
75
75
  samples.push(n);
76
76
  }
@@ -190,9 +190,10 @@ async function createCanvasImg(q, result, ds) {
190
190
  const chart = result.charts[k];
191
191
  const plot2Values = {};
192
192
  for (const plot of chart.plots) plot2Values[plot.label] = plot.values;
193
- const densities = await getDensities(plot2Values);
194
- let axisScale;
195
193
  const useLog = q.unit == "log";
194
+ const logBase = ds.cohort.termdb.logscaleBase2 ? 2 : 10;
195
+ const densities = await getDensities(plot2Values, useLog, logBase);
196
+ let axisScale;
196
197
  if (useLog) {
197
198
  axisScale = scaleLog().base(ds.cohort.termdb.logscaleBase2 ? 2 : 10).domain([result.min, result.max]).range(isH ? [0, q.svgw] : [q.svgw, 0]);
198
199
  } else {
@@ -241,8 +242,16 @@ async function getDensity(values) {
241
242
  const result = await getDensities({ plot: values });
242
243
  return result.plot;
243
244
  }
244
- async function getDensities(plot2Values) {
245
- const plot2Density = JSON.parse(await run_R("density.R", JSON.stringify({ plot2Values })));
245
+ async function getDensities(plot2Values, useLog = false, logBase = 10) {
246
+ let transformedPlot2Values = {};
247
+ if (useLog) {
248
+ for (const plot in plot2Values) {
249
+ transformedPlot2Values[plot] = plot2Values[plot].filter((v) => v > 0).map((v) => Math.log(v) / Math.log(logBase));
250
+ }
251
+ } else {
252
+ transformedPlot2Values = plot2Values;
253
+ }
254
+ const plot2Density = JSON.parse(await run_R("density.R", JSON.stringify({ plot2Values: transformedPlot2Values })));
246
255
  const densities = {};
247
256
  for (const plot in plot2Density) {
248
257
  const result = plot2Density[plot];
@@ -253,11 +262,12 @@ async function getDensities(plot2Values) {
253
262
  let xMax = -Infinity;
254
263
  for (const [i, x] of Object.entries(result.x)) {
255
264
  const density2 = result.y[i];
256
- xMin = Math.min(xMin, x);
257
- xMax = Math.max(xMax, x);
265
+ const x0 = useLog ? Math.pow(logBase, x) : x;
266
+ xMin = Math.min(xMin, x0);
267
+ xMax = Math.max(xMax, x0);
258
268
  densityMin = Math.min(densityMin, density2);
259
269
  densityMax = Math.max(densityMax, density2);
260
- bins.push({ x0: x, density: density2 });
270
+ bins.push({ x0, density: density2 });
261
271
  }
262
272
  bins.unshift({ x0: xMin, density: densityMin });
263
273
  bins.push({ x0: xMax, density: densityMin });