@sjcrh/proteinpaint-server 2.179.0 → 2.180.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -417,7 +417,7 @@ function termdb_test_default() {
417
417
  colorColumns: [
418
418
  {
419
419
  index: 3,
420
- name: "CellType"
420
+ name: "Cell Type"
421
421
  }
422
422
  ],
423
423
  coordsColumns: { x: 1, y: 2 },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.179.0",
3
+ "version": "2.180.0",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -64,9 +64,9 @@
64
64
  "@sjcrh/augen": "2.143.0",
65
65
  "@sjcrh/proteinpaint-python": "2.179.0",
66
66
  "@sjcrh/proteinpaint-r": "2.178.0",
67
- "@sjcrh/proteinpaint-rust": "2.179.0",
68
- "@sjcrh/proteinpaint-shared": "2.179.0",
69
- "@sjcrh/proteinpaint-types": "2.179.0",
67
+ "@sjcrh/proteinpaint-rust": "2.180.0",
68
+ "@sjcrh/proteinpaint-shared": "2.180.0",
69
+ "@sjcrh/proteinpaint-types": "2.180.0",
70
70
  "@types/express": "^5.0.0",
71
71
  "@types/express-session": "^1.18.1",
72
72
  "better-sqlite3": "^12.4.1",
package/routes/gdc.maf.js CHANGED
@@ -26,7 +26,8 @@ function init({ genomes }) {
26
26
  if (!g) throw "hg38 missing";
27
27
  const ds = g.datasets.GDC;
28
28
  if (!ds) throw "hg38 GDC missing";
29
- const payload = await listMafFiles(req.query, ds);
29
+ const q = req.query;
30
+ const payload = await listMafFiles(q, ds);
30
31
  res.send(payload);
31
32
  } catch (e) {
32
33
  res.send({ status: "error", error: e.message || e });
@@ -48,7 +49,7 @@ async function listMafFiles(q, ds) {
48
49
  if (q.filter0) {
49
50
  case_filters.content.push(q.filter0);
50
51
  }
51
- const { host } = ds.getHostHeaders(q);
52
+ const { host, headers } = ds.getHostHeaders(q);
52
53
  const body = {
53
54
  filters,
54
55
  size: maxFileNumber,
@@ -66,7 +67,7 @@ async function listMafFiles(q, ds) {
66
67
  ].join(",")
67
68
  };
68
69
  if (case_filters.content.length) body.case_filters = case_filters;
69
- const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, json: body });
70
+ const response = await ky.post(joinUrl(host.rest, "files"), { headers, timeout: false, json: body });
70
71
  if (!response.ok) throw `HTTP Error: ${response.status} ${response.statusText}`;
71
72
  const re = await response.json();
72
73
  if (!Number.isInteger(re.data?.pagination?.total)) throw "re.data.pagination.total is not int";
@@ -20,11 +20,11 @@ const api = {
20
20
  function init({ genomes }) {
21
21
  return async (req, res) => {
22
22
  try {
23
- const q = req.query;
24
23
  const g = genomes.hg38;
25
24
  if (!g) throw "hg38 missing";
26
25
  const ds = g.datasets.GDC;
27
26
  if (!ds) throw "hg38 GDC missing";
27
+ const q = req.query;
28
28
  await buildMaf(q, res, ds);
29
29
  } catch (e) {
30
30
  if (e.stack) console.log(e.stack);
@@ -34,14 +34,15 @@ function init({ genomes }) {
34
34
  }
35
35
  async function buildMaf(q, res, ds) {
36
36
  const t0 = Date.now();
37
- const { host } = ds.getHostHeaders(q);
38
- const fileLst2 = await getFileLstUnderSizeLimit(q.fileIdLst, host);
37
+ const { host, headers } = ds.getHostHeaders(q);
38
+ const fileLst2 = await getFileLstUnderSizeLimit(q.fileIdLst, host, headers);
39
39
  mayLog(`${fileLst2.length} out of ${q.fileIdLst.length} input MAF files accepted by size limit`, Date.now() - t0);
40
40
  const arg = {
41
41
  fileIdLst: fileLst2,
42
42
  columns: q.columns,
43
- host: joinUrl(host.rest, "data")
43
+ host: joinUrl(host.rest, "data"),
44
44
  // must use the /data/ endpoint from current host
45
+ headers
45
46
  };
46
47
  const boundary = "------------------------GDC-MAF-BUILD";
47
48
  res.setHeader("Content-Type", `multipart/form-data; boundary=${boundary}`);
@@ -97,7 +98,7 @@ async function buildMaf(q, res, ds) {
97
98
  if (end) res.end();
98
99
  }
99
100
  }
100
- async function getFileLstUnderSizeLimit(lst, host) {
101
+ async function getFileLstUnderSizeLimit(lst, host, headers) {
101
102
  if (lst.length == 0) throw "fileIdLst[] not array or blank";
102
103
  const body = {
103
104
  filters: {
@@ -107,7 +108,7 @@ async function getFileLstUnderSizeLimit(lst, host) {
107
108
  size: 1e4,
108
109
  fields: "file_size"
109
110
  };
110
- const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, json: body });
111
+ const response = await ky.post(joinUrl(host.rest, "files"), { headers, timeout: false, json: body });
111
112
  if (!response.ok) throw `HTTP Error: ${response.status} ${response.statusText}`;
112
113
  const re = await response.json();
113
114
  if (!Array.isArray(re.data?.hits)) throw "re.data.hits[] not array";
@@ -48,11 +48,23 @@ async function trigger_getcategories(q, res, tdb, ds) {
48
48
  const data = await getData(arg, ds);
49
49
  if (data.error) throw data.error;
50
50
  const [lst, orderedLabels] = getCategories(data, q, ds, $id);
51
+ const allowedValues = tdb.getRestrictedValues?.(q.__protected__?.clientAuthResult, q.tw.term.id);
52
+ const filtered = filterCategoriesByAllowedValues(lst, orderedLabels, allowedValues);
51
53
  res.send({
52
- lst,
53
- orderedLabels
54
+ lst: filtered.lst,
55
+ orderedLabels: filtered.orderedLabels
54
56
  });
55
57
  }
58
+ function filterCategoriesByAllowedValues(lst, orderedLabels, allowedValues) {
59
+ if (!allowedValues) {
60
+ return { lst, orderedLabels };
61
+ }
62
+ const allowedValuesSet = new Set(allowedValues.map((v) => String(v)));
63
+ const filteredLst = lst.filter((item) => allowedValuesSet.has(String(item.key)));
64
+ const allowedLabels = new Set(filteredLst.map((item) => item.label));
65
+ const filteredOrderedLabels = orderedLabels.filter((label) => allowedLabels.has(label));
66
+ return { lst: filteredLst, orderedLabels: filteredOrderedLabels };
67
+ }
56
68
  function getCategories(data, q, ds, $id) {
57
69
  const lst = [];
58
70
  if (q.tw.term.type == "geneVariant" && q.tw.q.type != "predefined-groupset" && q.tw.q.type != "custom-groupset") {
@@ -133,5 +145,6 @@ function getCategories(data, q, ds, $id) {
133
145
  }
134
146
  export {
135
147
  api,
148
+ filterCategoriesByAllowedValues,
136
149
  getCategories
137
150
  };
@@ -4,11 +4,13 @@ import { classifyNotPlot } from "./chat/classify2.ts";
4
4
  import { classifyPlotType } from "./chat/plot.ts";
5
5
  import { readJSONFile } from "./chat/utils.ts";
6
6
  import { extract_DE_search_terms_from_query } from "./chat/DEagent.ts";
7
+ import { determineAmbiguousGenePrompt } from "./chat/ambiguousgeneagent.ts";
7
8
  import { extract_summary_terms } from "./chat/summaryagent.ts";
8
9
  import { extract_matrix_search_terms_from_query } from "./chat/matrixagent.ts";
9
10
  import { extract_samplescatter_terms_from_query } from "./chat/samplescatteragent.ts";
10
11
  import { extract_hiercluster_terms_from_query } from "./chat/hierclusteragent.ts";
11
- import { parse_dataset_db, parse_geneset_db, getGenesetNames } from "./chat/utils.ts";
12
+ import { classifyGeneDataType } from "./chat/genedatatypeagent.ts";
13
+ import { extractGenesFromPrompt, parse_dataset_db, parse_geneset_db, getGenesetNames } from "./chat/utils.ts";
12
14
  import serverconfig from "../src/serverconfig.js";
13
15
  import { mayLog } from "#src/helpers.ts";
14
16
  import { formatElapsedTime } from "#shared";
@@ -33,8 +35,7 @@ function init({ genomes }) {
33
35
  if (!g) throw "invalid genome";
34
36
  const ds = g.datasets?.[q.dslabel];
35
37
  if (!ds) throw "invalid dslabel";
36
- const serverconfig_ds_entries = serverconfig.genomes.find((genome) => genome.name == q.genome).datasets.find((dslabel) => dslabel.name == ds.label);
37
- if (!serverconfig_ds_entries.aifiles) {
38
+ if (!ds?.queries?.chat?.aifiles) {
38
39
  throw "aifiles are missing for chatbot to work";
39
40
  }
40
41
  const llm = serverconfig.llm;
@@ -44,8 +45,7 @@ function init({ genomes }) {
44
45
  }
45
46
  const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
46
47
  const genedb = serverconfig.tpmasterdir + "/" + g.genedb.dbfile;
47
- const aiFilesPath = serverconfig_ds_entries.aifiles;
48
- const dataset_json = await readJSONFile(aiFilesPath);
48
+ const dataset_json = await readJSONFile(ds?.queries?.chat?.aifiles);
49
49
  const testing = false;
50
50
  const genesetNames = getGenesetNames(g);
51
51
  const ai_output_json = await run_chat_pipeline(
@@ -83,9 +83,40 @@ async function run_chat_pipeline(user_prompt, llm, dataset_json, testing, datase
83
83
  };
84
84
  }
85
85
  } else if (class_response.type == "plot") {
86
+ let geneFeatures = [];
87
+ const genes_list = await parse_geneset_db(genedb);
88
+ const relevant_genes = extractGenesFromPrompt(user_prompt, genes_list);
89
+ if (relevant_genes.length > 0) {
90
+ const AmbiguousGeneMessage = determineAmbiguousGenePrompt(user_prompt, relevant_genes, dataset_json);
91
+ if (AmbiguousGeneMessage.length > 0) {
92
+ return {
93
+ type: "text",
94
+ text: AmbiguousGeneMessage
95
+ };
96
+ }
97
+ const geneDataTypeMessage = await classifyGeneDataType(
98
+ user_prompt,
99
+ llm,
100
+ relevant_genes,
101
+ dataset_json
102
+ );
103
+ if (typeof geneDataTypeMessage === "string" || geneDataTypeMessage instanceof String) {
104
+ if (geneDataTypeMessage.length > 0) {
105
+ return {
106
+ type: "text",
107
+ text: geneDataTypeMessage
108
+ };
109
+ } else {
110
+ throw "classifyGeneDataType agent returned an empty string, which is unexpected.";
111
+ }
112
+ } else if (Array.isArray(geneDataTypeMessage)) {
113
+ geneFeatures = geneDataTypeMessage;
114
+ } else {
115
+ throw "geneDataTypeMessage has unknown data type returned from classifyGeneDataType agent";
116
+ }
117
+ }
86
118
  const classResult = await classifyPlotType(user_prompt, llm);
87
119
  const dataset_db_output = await parse_dataset_db(dataset_db);
88
- const genes_list = await parse_geneset_db(genedb);
89
120
  if (classResult == "summary") {
90
121
  const time12 = (/* @__PURE__ */ new Date()).valueOf();
91
122
  ai_output_json = await extract_summary_terms(
@@ -93,10 +124,10 @@ async function run_chat_pipeline(user_prompt, llm, dataset_json, testing, datase
93
124
  llm,
94
125
  dataset_db_output,
95
126
  dataset_json,
96
- genes_list,
97
127
  ds,
98
128
  testing,
99
- genesetNames
129
+ genesetNames,
130
+ geneFeatures
100
131
  );
101
132
  mayLog("Time taken for summary agent:", formatElapsedTime(Date.now() - time12));
102
133
  } else if (classResult == "dge") {
@@ -119,10 +150,10 @@ async function run_chat_pipeline(user_prompt, llm, dataset_json, testing, datase
119
150
  llm,
120
151
  dataset_db_output,
121
152
  dataset_json,
122
- genes_list,
123
153
  ds,
124
154
  testing,
125
- genesetNames
155
+ genesetNames,
156
+ geneFeatures
126
157
  );
127
158
  mayLog("Time taken for matrix agent:", formatElapsedTime(Date.now() - time12));
128
159
  } else if (classResult == "samplescatter") {
@@ -132,10 +163,10 @@ async function run_chat_pipeline(user_prompt, llm, dataset_json, testing, datase
132
163
  llm,
133
164
  dataset_db_output,
134
165
  dataset_json,
135
- genes_list,
136
166
  ds,
137
167
  testing,
138
- genesetNames
168
+ genesetNames,
169
+ geneFeatures
139
170
  );
140
171
  mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
141
172
  } else if (classResult == "hiercluster") {
@@ -145,12 +176,17 @@ async function run_chat_pipeline(user_prompt, llm, dataset_json, testing, datase
145
176
  llm,
146
177
  dataset_db_output,
147
178
  dataset_json,
148
- genes_list,
149
179
  ds,
150
180
  testing,
151
- genesetNames
181
+ genesetNames,
182
+ geneFeatures
152
183
  );
153
184
  mayLog("Time taken for hierCluster agent:", formatElapsedTime(Date.now() - time12));
185
+ } else if (classResult == "lollipop") {
186
+ ai_output_json = {
187
+ type: "text",
188
+ text: "This is a gene mutation prompt. But, lollipop agent has not been implemented yet"
189
+ };
154
190
  } else {
155
191
  ai_output_json = { type: "text", text: "Unknown classification value" };
156
192
  }
@@ -10,7 +10,6 @@ import { clusterMethodLst, distanceMethodLst } from "#shared/clustering.js";
10
10
  import { TermTypes, NUMERIC_DICTIONARY_TERM } from "#shared/terms.js";
11
11
  import { getData } from "#src/termdb.matrix.js";
12
12
  import { termType2label } from "#shared/terms.js";
13
- import { mayLog } from "#src/helpers.ts";
14
13
  import { formatElapsedTime } from "#shared/time.js";
15
14
  const api = {
16
15
  endpoint: "termdb/cluster",
@@ -109,7 +108,7 @@ async function getResult(q, ds) {
109
108
  }
110
109
  const t = Date.now();
111
110
  const clustering = await doClustering(term2sample2value, q, Object.keys(bySampleId).length);
112
- mayLog("clustering done:", formatElapsedTime(Date.now() - t));
111
+ console.log("clustering done:", formatElapsedTime(Date.now() - t));
113
112
  const result = { clustering, byTermId, bySampleId };
114
113
  if (removedHierClusterTerms.length) result.removedHierClusterTerms = removedHierClusterTerms;
115
114
  return result;
@@ -292,7 +291,7 @@ async function validateNative(q, ds) {
292
291
  }
293
292
  const time1 = Date.now();
294
293
  const geneData = JSON.parse(await queryHDF5(q.file, geneNames));
295
- mayLog("Time taken to run gene query:", formatElapsedTime(Date.now() - time1));
294
+ console.log("Time taken to run gene query:", formatElapsedTime(Date.now() - time1));
296
295
  const genesData = geneData.query_output || {};
297
296
  if (!genesData) throw "No expression data returned from HDF5 query";
298
297
  for (const tw of param.terms) {
@@ -258,7 +258,9 @@ function addNonDictionaryQueries(c, ds, genome) {
258
258
  }
259
259
  };
260
260
  if (q.singleCell.geneExpression) {
261
- q2.singleCell.geneExpression = {};
261
+ q2.singleCell.geneExpression = {
262
+ unit: q.singleCell.geneExpression?.unit
263
+ };
262
264
  }
263
265
  if (q.singleCell.DEgenes) {
264
266
  q2.singleCell.DEgenes = { termId: q.singleCell.DEgenes.termId };
@@ -40,10 +40,20 @@ async function getFilters(query, ds) {
40
40
  );
41
41
  const tw2List = {};
42
42
  for (const tw of query.terms) {
43
- tw2List[tw.term.id] = getList(samplesPerFilter, filtersData, tw, query.showAll);
43
+ let values = getList(samplesPerFilter, filtersData, tw, query.showAll);
44
+ const allowedValues = ds.cohort.termdb.getRestrictedValues?.(query.__protected__.clientAuthResult, tw.term.id);
45
+ values = filterByAllowedValues(values, allowedValues);
46
+ tw2List[tw.term.id] = values;
44
47
  }
45
48
  return { ...tw2List };
46
49
  }
50
+ function filterByAllowedValues(values, allowedValues) {
51
+ if (!allowedValues) {
52
+ return values;
53
+ }
54
+ const allowedValuesSet = new Set(allowedValues.map((v) => String(v)));
55
+ return values.filter((v) => v.value === "" || allowedValuesSet.has(String(v.value)));
56
+ }
47
57
  async function getSamplesPerFilter(q, ds) {
48
58
  q.ds = ds;
49
59
  const samples = {};
@@ -84,5 +94,6 @@ function getList(samplesPerFilter, filtersData, tw, showAll) {
84
94
  return filteredValues;
85
95
  }
86
96
  export {
87
- api
97
+ api,
98
+ filterByAllowedValues
88
99
  };
@@ -7,7 +7,7 @@ import { authApi } from "../src/auth.js";
7
7
  import { run_R } from "@sjcrh/proteinpaint-r";
8
8
  import { read_file } from "../src/utils.js";
9
9
  import { getDescrStats } from "#routes/termdb.descrstats.ts";
10
- import { SINGLECELL_GENE_EXPRESSION, isSingleCellTerm } from "#shared/terms.js";
10
+ import { isSingleCellTerm, SINGLECELL_GENE_EXPRESSION, SINGLECELL_CELLTYPE } from "#shared/terms.js";
11
11
  const api = {
12
12
  endpoint: "termdb/sampleScatter",
13
13
  methods: {
@@ -126,7 +126,8 @@ async function getSingleCellScatter(req, res, ds) {
126
126
  throw new Error("colorTW must be provided and be a single cell term for single cell scatter plot");
127
127
  const arg = { plots: [name], sample };
128
128
  if (tw.term.type == SINGLECELL_GENE_EXPRESSION) arg.gene = tw.term.gene;
129
- else arg.colorBy = tw.term.name;
129
+ else if (tw.term.type == SINGLECELL_CELLTYPE) arg.colorBy = tw.term.name;
130
+ else throw new Error(`unsupported single cell term type: ${tw.term.type}`);
130
131
  const data = await ds.queries.singleCell.data.get(arg);
131
132
  const plot = data.plots[0];
132
133
  const cells = [...plot.expCells, ...plot.noExpCells];
@@ -153,7 +154,11 @@ async function getSingleCellScatter(req, res, ds) {
153
154
  const categories = new Set(samples.map((s) => s.category));
154
155
  const colorMap = {};
155
156
  if (tw.term.type != SINGLECELL_GENE_EXPRESSION) {
156
- const k2c = getColors(categories.size);
157
+ const defaultK2c = getColors(categories.size);
158
+ const k2c = (category) => {
159
+ const dsTerm = ds.queries.singleCell?.terms ? ds.queries.singleCell.terms.find((t) => t.name == tw.term.name) : void 0;
160
+ return tw.term.values?.[category]?.color || dsTerm?.values?.[category]?.color || defaultK2c(category);
161
+ };
157
162
  for (const category of categories) {
158
163
  const color = k2c(category);
159
164
  colorMap[category] = {
@@ -223,9 +223,13 @@ function gdc_validateGeneExpression(G, ds, genome) {
223
223
  gene_ids: [gencodeId],
224
224
  file_id: hdf5id
225
225
  };
226
- const { host } = ds.getHostHeaders(q);
226
+ const { host, headers } = ds.getHostHeaders(q);
227
227
  const t = Date.now();
228
- const response = await ky.post(joinUrl(host.rest, "scrna_seq/gene_expression"), { timeout: false, json: body });
228
+ const response = await ky.post(joinUrl(host.rest, "scrna_seq/gene_expression"), {
229
+ timeout: false,
230
+ headers,
231
+ json: body
232
+ });
229
233
  if (!response.ok) throw new Error(`HTTP Error: ${response.status} ${response.statusText}`);
230
234
  const out = await response.json();
231
235
  mayLog("gdc scrna gene exp", q.gene, Date.now() - t);
@@ -6,7 +6,6 @@ import { makeFilter } from "#src/mds3.gdc.js";
6
6
  import { cachedFetch } from "#src/utils.js";
7
7
  import { joinUrl } from "#shared/joinUrl.js";
8
8
  import { formatElapsedTime } from "#shared/time.js";
9
- import { mayLog } from "#src/helpers.ts";
10
9
  const api = {
11
10
  endpoint: "termdb/topVariablyExpressedGenes",
12
11
  methods: {
@@ -34,7 +33,7 @@ function init({ genomes }) {
34
33
  result = {
35
34
  genes: await ds.queries.topVariablyExpressedGenes.getGenes(q)
36
35
  };
37
- mayLog("topVariablyExpressedGenes", formatElapsedTime(Date.now() - t));
36
+ console.log("compute top variably expressed genes:", formatElapsedTime(Date.now() - t));
38
37
  } catch (e) {
39
38
  result = { status: e.status || 400, error: e.message || e };
40
39
  }