@sjcrh/proteinpaint-server 2.177.1-0 → 2.178.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -210,18 +210,31 @@ function termdb_test_default() {
210
210
  defaultTw4correlationPlot: {
211
211
  disease: { id: "diaggrp", q: {} }
212
212
  },
213
- numericTermCollections: [
213
+ termCollections: [
214
214
  {
215
215
  name: "Fake Collection 1",
216
+ type: "numeric",
216
217
  termIds: ["agedx", "a_death", "a_ndi", "agelastvisit"],
217
218
  branchIds: ["Demographic Variables", "Age (years)"],
218
219
  propsByTermId: {}
219
220
  },
220
221
  {
221
222
  name: "Fake Collection 2",
223
+ type: "numeric",
222
224
  termIds: ["a_death", "a_ndi", "agelastvisit"],
223
225
  branchIds: ["Demographic Variables", "Age (years)"],
224
226
  propsByTermId: {}
227
+ },
228
+ {
229
+ name: "Assay Availability",
230
+ type: "categorical",
231
+ categoryKeys: [
232
+ { key: "1", shown: true },
233
+ { key: "2", shown: true }
234
+ ],
235
+ termIds: ["assayavailability_cnv", "assayavailability_fusion", "assayavailability_germline"],
236
+ branchIds: [""],
237
+ propsByTermId: {}
225
238
  }
226
239
  ]
227
240
  },
@@ -375,7 +388,11 @@ function termdb_test_default() {
375
388
  },
376
389
  dnaMethylation: {
377
390
  file: "files/hg38/TermdbTest/dnaMeth.h5",
378
- unit: "Average Beta Value"
391
+ unit: "Average Beta Value",
392
+ promoter: {
393
+ file: "files/hg38/TermdbTest/dnaMethPromoterMvalue.h5",
394
+ unit: "M-value"
395
+ }
379
396
  },
380
397
  topVariablyExpressedGenes: {
381
398
  src: "native"
@@ -421,7 +438,7 @@ function termdb_test_default() {
421
438
  jsonFile: "files/hg38/TermdbTest/trackLst/facet.json",
422
439
  activeTracks: ["bw 1", "bed 1"]
423
440
  },
424
- chat: {}
441
+ chat: { aifiles: "./proteinpaint/server/dataset/ai/termdb.test.json" }
425
442
  }
426
443
  };
427
444
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.177.1-0",
3
+ "version": "2.178.0",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -62,11 +62,11 @@
62
62
  },
63
63
  "dependencies": {
64
64
  "@sjcrh/augen": "2.143.0",
65
- "@sjcrh/proteinpaint-python": "2.177.1-0",
66
- "@sjcrh/proteinpaint-r": "2.152.1-0",
67
- "@sjcrh/proteinpaint-rust": "2.177.1-0",
68
- "@sjcrh/proteinpaint-shared": "2.177.1-0",
69
- "@sjcrh/proteinpaint-types": "2.177.1-0",
65
+ "@sjcrh/proteinpaint-python": "2.178.0",
66
+ "@sjcrh/proteinpaint-r": "2.178.0",
67
+ "@sjcrh/proteinpaint-rust": "2.178.0",
68
+ "@sjcrh/proteinpaint-shared": "2.178.0",
69
+ "@sjcrh/proteinpaint-types": "2.178.0",
70
70
  "@types/express": "^5.0.0",
71
71
  "@types/express-session": "^1.18.1",
72
72
  "better-sqlite3": "^12.4.1",
package/routes/grin2.js CHANGED
@@ -8,6 +8,7 @@ import os from "os";
8
8
  import { get_samples } from "#src/termdb.sql.js";
9
9
  import { read_file, file_is_readable } from "#src/utils.js";
10
10
  import { dtsnvindel, dtcnv, dtfusionrna, dtsv, dt2lesion, optionToDt, formatElapsedTime } from "#shared";
11
+ import { mayFilterByMaf } from "#src/mds3.init.js";
11
12
  import crypto from "crypto";
12
13
  import { promisify } from "node:util";
13
14
  import { exec as execCallback } from "node:child_process";
@@ -32,6 +33,7 @@ const api = {
32
33
  };
33
34
  function init({ genomes }) {
34
35
  return async (req, res) => {
36
+ const signal = req.query.__abortSignal;
35
37
  try {
36
38
  const request = req.query;
37
39
  const g = genomes[request.genome];
@@ -39,9 +41,13 @@ function init({ genomes }) {
39
41
  const ds = g.datasets?.[request.dslabel];
40
42
  if (!ds) throw new Error("ds missing");
41
43
  if (!ds.queries?.singleSampleMutation) throw new Error("singleSampleMutation query missing from dataset");
42
- const result = await runGrin2WithLimit(g, ds, request);
44
+ const result = await runGrin2WithLimit(g, ds, request, signal);
43
45
  res.json(result);
44
46
  } catch (e) {
47
+ if (signal?.aborted) {
48
+ mayLog("[GRIN2] Analysis aborted due to client disconnect");
49
+ return;
50
+ }
45
51
  console.error("[GRIN2] Error stack:", e.stack);
46
52
  const errorResponse = {
47
53
  status: "error",
@@ -93,7 +99,7 @@ async function getMaxLesions() {
93
99
  return MAX_LESIONS;
94
100
  }
95
101
  let activeGrin2Jobs = 0;
96
- async function runGrin2WithLimit(g, ds, request) {
102
+ async function runGrin2WithLimit(g, ds, request, signal) {
97
103
  if (activeGrin2Jobs >= GRIN2_CONCURRENCY_LIMIT) {
98
104
  const error = new Error(
99
105
  `GRIN2 analysis queue is full (${GRIN2_CONCURRENCY_LIMIT} concurrent analyses). Please try again in a few minutes.`
@@ -105,7 +111,7 @@ async function runGrin2WithLimit(g, ds, request) {
105
111
  activeGrin2Jobs++;
106
112
  mayLog(`[GRIN2] Starting analysis. Active jobs: ${activeGrin2Jobs}/${GRIN2_CONCURRENCY_LIMIT}`);
107
113
  try {
108
- return await runGrin2(g, ds, request);
114
+ return await runGrin2(g, ds, request, signal);
109
115
  } finally {
110
116
  activeGrin2Jobs--;
111
117
  mayLog(`[GRIN2] Analysis complete. Active jobs: ${activeGrin2Jobs}/${GRIN2_CONCURRENCY_LIMIT}`);
@@ -136,7 +142,7 @@ function getCnvLesionType(isGain) {
136
142
  }
137
143
  return lesionType.lesionType;
138
144
  }
139
- async function runGrin2(g, ds, request) {
145
+ async function runGrin2(g, ds, request, signal) {
140
146
  const startTime = Date.now();
141
147
  const samples = await get_samples(
142
148
  request,
@@ -176,7 +182,7 @@ async function runGrin2(g, ds, request) {
176
182
  pyInput.chromosomelist[c] = g.majorchr[c];
177
183
  }
178
184
  const grin2AnalysisStart = Date.now();
179
- const pyResult = await run_python("grin2PpWrapper.py", JSON.stringify(pyInput));
185
+ const pyResult = await run_python("grin2PpWrapper.py", JSON.stringify(pyInput), { signal });
180
186
  if (pyResult.stderr?.trim()) {
181
187
  mayLog(`[GRIN2] Python stderr: ${pyResult.stderr}`);
182
188
  if (pyResult.stderr.includes("ERROR:")) {
@@ -201,7 +207,7 @@ async function runGrin2(g, ds, request) {
201
207
  bin_size: request.binSize
202
208
  };
203
209
  const manhattanPlotStart = Date.now();
204
- const rsResult = await run_rust("manhattan_plot", JSON.stringify(rustInput));
210
+ const rsResult = await run_rust("manhattan_plot", JSON.stringify(rustInput), [], { signal });
205
211
  const manhattanPlotTime = Date.now() - manhattanPlotStart;
206
212
  mayLog(`[GRIN2] Manhattan plot generation took ${formatElapsedTime(manhattanPlotTime)}`);
207
213
  const manhattanPlotData = JSON.parse(rsResult);
@@ -431,6 +437,19 @@ function filterAndConvertSnvIndel(sampleName, entry, options) {
431
437
  if (!Number.isInteger(entry.pos)) {
432
438
  return null;
433
439
  }
440
+ if (options.mafFilter?.lst?.length) {
441
+ if (!Array.isArray(entry.vafs)) return null;
442
+ const copy = { dt: dtsnvindel };
443
+ for (const v of entry.vafs) {
444
+ copy[v.id] = v.refCount + "," + v.altCount;
445
+ }
446
+ try {
447
+ if (!mayFilterByMaf(options.mafFilter, copy)) return null;
448
+ } catch (e) {
449
+ mayLog("mayFilterByMaf() crashed on a snvindel " + (e instanceof Error ? e.message : String(e)));
450
+ return null;
451
+ }
452
+ }
434
453
  const start = entry.pos;
435
454
  const end = entry.pos;
436
455
  return [sampleName, entry.chr, start, end, dt2lesion[dtsnvindel].lesionTypes[0].lesionType];
@@ -2,6 +2,7 @@ import fs from "fs";
2
2
  import { ezFetch } from "#shared";
3
3
  import { get_samples } from "#src/termdb.sql.js";
4
4
  import { ChatPayload } from "#types/checkers";
5
+ import { extractResourceResponse } from "./chat/resource.ts";
5
6
  import serverconfig from "../src/serverconfig.js";
6
7
  import { mayLog } from "#src/helpers.ts";
7
8
  import Database from "better-sqlite3";
@@ -143,8 +144,15 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
143
144
  );
144
145
  let ai_output_json;
145
146
  mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
146
- if (class_response.type == "html") {
147
- ai_output_json = class_response;
147
+ if (class_response.type == "none") {
148
+ ai_output_json = {
149
+ type: "text",
150
+ text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
151
+ };
152
+ } else if (class_response.type == "resource") {
153
+ const time12 = (/* @__PURE__ */ new Date()).valueOf();
154
+ ai_output_json = await extractResourceResponse(user_prompt, llm, dataset_json);
155
+ mayLog("Time taken for resource agent:", formatElapsedTime(Date.now() - time12));
148
156
  } else if (class_response.type == "plot") {
149
157
  const classResult = class_response.plot;
150
158
  mayLog("classResult:", classResult);
@@ -174,7 +182,7 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
174
182
  );
175
183
  mayLog("Time taken for DE agent:", formatElapsedTime(Date.now() - time12));
176
184
  } else if (classResult == "survival") {
177
- ai_output_json = { type: "html", html: "survival agent has not been implemented yet" };
185
+ ai_output_json = { type: "text", text: "survival agent has not been implemented yet" };
178
186
  } else if (classResult == "matrix") {
179
187
  const time12 = (/* @__PURE__ */ new Date()).valueOf();
180
188
  ai_output_json = await extract_matrix_search_terms_from_query(
@@ -200,13 +208,10 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
200
208
  );
201
209
  mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
202
210
  } else {
203
- ai_output_json = { type: "html", html: "Unknown classification value" };
211
+ ai_output_json = { type: "text", text: "Unknown classification value" };
204
212
  }
205
213
  } else {
206
- ai_output_json = {
207
- type: "html",
208
- html: "Unknown classification type"
209
- };
214
+ ai_output_json = { type: "text", text: "Unknown classification type" };
210
215
  }
211
216
  return ai_output_json;
212
217
  }
@@ -0,0 +1,153 @@
1
+ import { ChatPayload } from "#types/checkers";
2
+ import { classifyQuery } from "./chat/classify1.ts";
3
+ import { classifyNotPlot } from "./chat/classify2.ts";
4
+ import { classifyPlotType } from "./chat/plot.ts";
5
+ import { readJSONFile } from "./chat/utils.ts";
6
+ import { extract_DE_search_terms_from_query } from "./chat/DEagent.ts";
7
+ import { extract_summary_terms } from "./chat/summaryagent.ts";
8
+ import { extract_matrix_search_terms_from_query } from "./chat/matrixagent.ts";
9
+ import { extract_samplescatter_terms_from_query } from "./chat/samplescatteragent.ts";
10
+ import { parse_dataset_db, parse_geneset_db, getGenesetNames } from "./chat/utils.ts";
11
+ import serverconfig from "../src/serverconfig.js";
12
+ import { mayLog } from "#src/helpers.ts";
13
+ import { formatElapsedTime } from "#shared";
14
+ const api = {
15
+ endpoint: "termdb/chat2",
16
+ methods: {
17
+ get: {
18
+ ...ChatPayload,
19
+ init
20
+ },
21
+ post: {
22
+ ...ChatPayload,
23
+ init
24
+ }
25
+ }
26
+ };
27
+ function init({ genomes }) {
28
+ return async (req, res) => {
29
+ const q = req.query;
30
+ try {
31
+ const g = genomes[q.genome];
32
+ if (!g) throw "invalid genome";
33
+ const ds = g.datasets?.[q.dslabel];
34
+ if (!ds) throw "invalid dslabel";
35
+ const serverconfig_ds_entries = serverconfig.genomes.find((genome) => genome.name == q.genome).datasets.find((dslabel) => dslabel.name == ds.label);
36
+ if (!serverconfig_ds_entries.aifiles) {
37
+ throw "aifiles are missing for chatbot to work";
38
+ }
39
+ const llm = serverconfig.llm;
40
+ if (!llm) throw "serverconfig.llm is not configured";
41
+ if (llm.provider !== "SJ" && llm.provider !== "ollama") {
42
+ throw "llm.provider must be 'SJ' or 'ollama'";
43
+ }
44
+ const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
45
+ const genedb = serverconfig.tpmasterdir + "/" + g.genedb.dbfile;
46
+ const aiFilesPath = serverconfig_ds_entries.aifiles;
47
+ const dataset_json = await readJSONFile(aiFilesPath);
48
+ const testing = false;
49
+ const genesetNames = getGenesetNames(g);
50
+ const ai_output_json = await run_chat_pipeline(
51
+ q.prompt,
52
+ llm,
53
+ serverconfig.aiRoute,
54
+ dataset_json,
55
+ testing,
56
+ dataset_db,
57
+ genedb,
58
+ ds,
59
+ genesetNames
60
+ );
61
+ res.send(ai_output_json);
62
+ } catch (e) {
63
+ if (e.stack) mayLog(e.stack);
64
+ res.send({ error: e?.message || e });
65
+ }
66
+ };
67
+ }
68
+ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testing, dataset_db, genedb, ds, genesetNames = []) {
69
+ const time1 = (/* @__PURE__ */ new Date()).valueOf();
70
+ const class_response = await classifyQuery(user_prompt, llm);
71
+ let ai_output_json;
72
+ mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
73
+ if (class_response.type == "notplot") {
74
+ const time2 = (/* @__PURE__ */ new Date()).valueOf();
75
+ const notPlotResult = await classifyNotPlot(user_prompt, llm, dataset_json);
76
+ mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
77
+ if (notPlotResult.type == "html") {
78
+ ai_output_json = notPlotResult;
79
+ } else {
80
+ ai_output_json = {
81
+ type: "text",
82
+ text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
83
+ };
84
+ }
85
+ } else if (class_response.type == "plot") {
86
+ const classResult = await classifyPlotType(user_prompt, llm);
87
+ mayLog("classResult:", classResult);
88
+ const dataset_db_output = await parse_dataset_db(dataset_db);
89
+ const genes_list = dataset_json.hasGeneExpression ? await parse_geneset_db(genedb) : [];
90
+ if (classResult == "summary") {
91
+ const time12 = (/* @__PURE__ */ new Date()).valueOf();
92
+ ai_output_json = await extract_summary_terms(
93
+ user_prompt,
94
+ llm,
95
+ dataset_db_output,
96
+ dataset_json,
97
+ genes_list,
98
+ ds,
99
+ testing,
100
+ genesetNames
101
+ );
102
+ mayLog("Time taken for summary agent:", formatElapsedTime(Date.now() - time12));
103
+ } else if (classResult == "dge") {
104
+ const time12 = (/* @__PURE__ */ new Date()).valueOf();
105
+ ai_output_json = await extract_DE_search_terms_from_query(
106
+ user_prompt,
107
+ llm,
108
+ dataset_db_output,
109
+ dataset_json,
110
+ ds,
111
+ testing
112
+ );
113
+ mayLog("Time taken for DE agent:", formatElapsedTime(Date.now() - time12));
114
+ } else if (classResult == "survival") {
115
+ ai_output_json = { type: "text", text: "survival agent has not been implemented yet" };
116
+ } else if (classResult == "matrix") {
117
+ const time12 = (/* @__PURE__ */ new Date()).valueOf();
118
+ ai_output_json = await extract_matrix_search_terms_from_query(
119
+ user_prompt,
120
+ llm,
121
+ dataset_db_output,
122
+ dataset_json,
123
+ genes_list,
124
+ ds,
125
+ testing,
126
+ genesetNames
127
+ );
128
+ mayLog("Time taken for matrix agent:", formatElapsedTime(Date.now() - time12));
129
+ } else if (classResult == "samplescatter") {
130
+ const time12 = (/* @__PURE__ */ new Date()).valueOf();
131
+ ai_output_json = await extract_samplescatter_terms_from_query(
132
+ user_prompt,
133
+ llm,
134
+ dataset_db_output,
135
+ dataset_json,
136
+ genes_list,
137
+ ds,
138
+ testing,
139
+ genesetNames
140
+ );
141
+ mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
142
+ } else {
143
+ ai_output_json = { type: "text", text: "Unknown classification value" };
144
+ }
145
+ } else {
146
+ ai_output_json = { type: "text", text: "Unknown classification type" };
147
+ }
148
+ return ai_output_json;
149
+ }
150
+ export {
151
+ api,
152
+ run_chat_pipeline
153
+ };
@@ -44,6 +44,13 @@ function init({ genomes }) {
44
44
  if (q.terms.length < 3)
45
45
  throw `A minimum of three genes is required for clustering. Please refresh this page to clear this error.`;
46
46
  result = await getResult(q, ds);
47
+ } else if (TermTypes.WHOLE_PROTEOME_ABUNDANCE == q.dataType) {
48
+ if (!ds.queries?.proteome?.whole) throw `no ${TermTypes.WHOLE_PROTEOME_ABUNDANCE} data on this dataset`;
49
+ if (!q.terms) throw `missing gene list`;
50
+ if (!Array.isArray(q.terms)) throw `gene list is not an array`;
51
+ if (q.terms.length < 3)
52
+ throw `A minimum of three genes is required for clustering. Please refresh this page to clear this error.`;
53
+ result = await getResult(q, ds);
47
54
  } else {
48
55
  throw "unknown q.dataType " + q.dataType;
49
56
  }
@@ -68,6 +75,9 @@ async function getResult(q, ds) {
68
75
  if (q.dataType == NUMERIC_DICTIONARY_TERM) {
69
76
  ;
70
77
  ({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds));
78
+ } else if (q.dataType == TermTypes.WHOLE_PROTEOME_ABUNDANCE) {
79
+ ;
80
+ ({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.whole.get(_q, ds));
71
81
  } else {
72
82
  ;
73
83
  ({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries[q.dataType].get(_q, ds));
@@ -72,7 +72,7 @@ function make(q, req, res, ds, genome) {
72
72
  if (tdb.survival) c.survival = tdb.survival;
73
73
  if (tdb.regression) c.regression = tdb.regression;
74
74
  if (tdb.uiLabels) c.uiLabels = tdb.uiLabels;
75
- if (tdb.numericTermCollections) c.numericTermCollections = tdb.numericTermCollections;
75
+ if (tdb.termCollections) c.termCollections = tdb.termCollections;
76
76
  if (ds.assayAvailability) c.assayAvailability = ds.assayAvailability;
77
77
  if (ds.cohort.correlationVolcano) c.correlationVolcano = ds.cohort.correlationVolcano;
78
78
  if (ds.cohort.boxplots) c.boxplots = ds.cohort.boxplots;
@@ -124,8 +124,8 @@ function addMatrixplots(c, ds) {
124
124
  });
125
125
  }
126
126
  function addMutationSignatureplots(c, ds) {
127
- const mutationSignatureplots = ds.cohort.termdb.numericTermCollections?.find(
128
- (ntc) => ntc.name == "Mutation Signature"
127
+ const mutationSignatureplots = ds.cohort.termdb.termCollections?.find(
128
+ (tc) => tc.name == "Mutation Signature" && tc.type === "numeric"
129
129
  )?.plots;
130
130
  if (!mutationSignatureplots) return;
131
131
  c.mutationSignatureplots = mutationSignatureplots.map((p) => {
@@ -191,6 +191,9 @@ function addNonDictionaryQueries(c, ds, genome) {
191
191
  }
192
192
  if (q.dnaMethylation) {
193
193
  q2.dnaMethylation = { unit: q.dnaMethylation.unit };
194
+ if (q.dnaMethylation.promoter) {
195
+ q2.dnaMethylation.promoter = { unit: q.dnaMethylation.promoter.unit };
196
+ }
194
197
  }
195
198
  if (q.ld) {
196
199
  q2.ld = structuredClone(q.ld);
@@ -251,6 +254,9 @@ function addNonDictionaryQueries(c, ds, genome) {
251
254
  if (q.singleCell.DEgenes) {
252
255
  q2.singleCell.DEgenes = { termId: q.singleCell.DEgenes.termId };
253
256
  }
257
+ if (q.singleCell?.terms?.length) {
258
+ c.scctTerms = q.singleCell.terms;
259
+ }
254
260
  }
255
261
  if (q.images) {
256
262
  q2.images = {};
@@ -266,9 +272,14 @@ function getAllowedTermTypes(ds) {
266
272
  }
267
273
  if (ds.queries?.geneExpression) typeSet.add(TermTypes.GENE_EXPRESSION);
268
274
  if (ds.queries?.metaboliteIntensity) typeSet.add(TermTypes.METABOLITE_INTENSITY);
275
+ if (ds.queries?.proteome?.whole) typeSet.add(TermTypes.WHOLE_PROTEOME_ABUNDANCE);
269
276
  if (ds.queries?.ssGSEA) typeSet.add(TermTypes.SSGSEA);
270
277
  if (ds.queries?.dnaMethylation) typeSet.add(TermTypes.DNA_METHYLATION);
271
- if (ds.cohort.termdb.numericTermCollections) typeSet.add("termCollection");
278
+ if (ds.queries?.singleCell) {
279
+ typeSet.add(TermTypes.SINGLECELL_CELLTYPE);
280
+ if (ds.queries.singleCell?.geneExpression) typeSet.add(TermTypes.SINGLECELL_GENE_EXPRESSION);
281
+ }
282
+ if (ds.cohort.termdb.termCollections?.length) typeSet.add("termCollection");
272
283
  return [...typeSet];
273
284
  }
274
285
  function getSelectCohort(ds, req) {
@@ -0,0 +1,180 @@
1
+ import { diffMethPayload } from "#types/checkers";
2
+ import { getData } from "../src/termdb.matrix.js";
3
+ import { get_ds_tdb } from "../src/termdb.js";
4
+ import { run_R } from "@sjcrh/proteinpaint-r";
5
+ import { mayLog } from "#src/helpers.ts";
6
+ import { formatElapsedTime } from "#shared";
7
+ const api = {
8
+ endpoint: "termdb/diffMeth",
9
+ methods: {
10
+ get: {
11
+ ...diffMethPayload,
12
+ init
13
+ },
14
+ post: {
15
+ ...diffMethPayload,
16
+ init
17
+ }
18
+ }
19
+ };
20
+ function init({ genomes }) {
21
+ return async (req, res) => {
22
+ try {
23
+ const q = req.query;
24
+ const genome = genomes[q.genome];
25
+ if (!genome) throw new Error("invalid genome");
26
+ const [ds] = get_ds_tdb(genome, q);
27
+ let term_results = [];
28
+ if (q.tw) {
29
+ term_results = await getData({ filter: q.filter, filter0: q.filter0, terms: [q.tw] }, ds);
30
+ if (term_results.error) throw new Error(term_results.error);
31
+ }
32
+ let term_results2 = [];
33
+ if (q.tw2) {
34
+ term_results2 = await getData({ filter: q.filter, filter0: q.filter0, terms: [q.tw2] }, ds);
35
+ if (term_results2.error) throw new Error(term_results2.error);
36
+ }
37
+ const results = await run_diffMeth(req.query, ds, term_results, term_results2);
38
+ if (!results || !results.data) throw new Error("No data available");
39
+ res.send(results);
40
+ } catch (e) {
41
+ res.send({ status: "error", error: e.message || e });
42
+ if (e instanceof Error && e.stack) console.log(e);
43
+ }
44
+ };
45
+ }
46
+ async function run_diffMeth(param, ds, term_results, term_results2) {
47
+ if (param.samplelst?.groups?.length != 2) throw new Error(".samplelst.groups.length!=2");
48
+ if (param.samplelst.groups[0].values?.length < 1) throw new Error("samplelst.groups[0].values.length<1");
49
+ if (param.samplelst.groups[1].values?.length < 1) throw new Error("samplelst.groups[1].values.length<1");
50
+ const q = ds.queries.dnaMethylation?.promoter;
51
+ if (!q) throw new Error("ds.queries.dnaMethylation.promoter is not configured");
52
+ if (!q.file) throw new Error("ds.queries.dnaMethylation.promoter.file is missing");
53
+ const group1names = [];
54
+ const conf1_group1 = [];
55
+ const conf2_group1 = [];
56
+ for (const s of param.samplelst.groups[0].values) {
57
+ if (!Number.isInteger(s.sampleId)) continue;
58
+ const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
59
+ if (!n) continue;
60
+ if (!q.allSampleSet.has(n)) continue;
61
+ if (param.tw && param.tw2) {
62
+ if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
63
+ conf1_group1.push(
64
+ param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
65
+ );
66
+ conf2_group1.push(
67
+ param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
68
+ );
69
+ group1names.push(n);
70
+ }
71
+ } else if (param.tw && !param.tw2) {
72
+ if (term_results.samples[s.sampleId]) {
73
+ conf1_group1.push(
74
+ param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
75
+ );
76
+ group1names.push(n);
77
+ }
78
+ } else if (!param.tw && param.tw2) {
79
+ if (term_results2.samples[s.sampleId]) {
80
+ conf2_group1.push(
81
+ param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
82
+ );
83
+ group1names.push(n);
84
+ }
85
+ } else {
86
+ group1names.push(n);
87
+ }
88
+ }
89
+ const group2names = [];
90
+ const conf1_group2 = [];
91
+ const conf2_group2 = [];
92
+ for (const s of param.samplelst.groups[1].values) {
93
+ if (!Number.isInteger(s.sampleId)) continue;
94
+ const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
95
+ if (!n) continue;
96
+ if (!q.allSampleSet.has(n)) continue;
97
+ if (param.tw && param.tw2) {
98
+ if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
99
+ conf1_group2.push(
100
+ param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
101
+ );
102
+ conf2_group2.push(
103
+ param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
104
+ );
105
+ group2names.push(n);
106
+ }
107
+ } else if (param.tw && !param.tw2) {
108
+ if (term_results.samples[s.sampleId]) {
109
+ conf1_group2.push(
110
+ param.tw.q.mode == "continuous" ? term_results.samples[s.sampleId][param.tw.$id]["value"] : term_results.samples[s.sampleId][param.tw.$id]["key"]
111
+ );
112
+ group2names.push(n);
113
+ }
114
+ } else if (!param.tw && param.tw2) {
115
+ if (term_results2.samples[s.sampleId]) {
116
+ conf2_group2.push(
117
+ param.tw2.q.mode == "continuous" ? term_results2.samples[s.sampleId][param.tw2.$id]["value"] : term_results2.samples[s.sampleId][param.tw2.$id]["key"]
118
+ );
119
+ group2names.push(n);
120
+ }
121
+ } else {
122
+ group2names.push(n);
123
+ }
124
+ }
125
+ const sample_size1 = group1names.length;
126
+ const sample_size2 = group2names.length;
127
+ const alerts = validateGroups(sample_size1, sample_size2, group1names, group2names);
128
+ if (param.preAnalysis) {
129
+ const group1Name = param.samplelst.groups[0].name;
130
+ const group2Name = param.samplelst.groups[1].name;
131
+ return {
132
+ data: {
133
+ [group1Name]: sample_size1,
134
+ [group2Name]: sample_size2,
135
+ ...alerts.length ? { alert: alerts.join(" | ") } : {}
136
+ }
137
+ };
138
+ }
139
+ if (alerts.length) throw new Error(alerts.join(" | "));
140
+ const diffMethInput = {
141
+ case: group2names.join(","),
142
+ control: group1names.join(","),
143
+ input_file: q.file,
144
+ min_samples_per_group: param.min_samples_per_group
145
+ };
146
+ if (param.tw) {
147
+ diffMethInput.conf1 = [...conf1_group2, ...conf1_group1];
148
+ diffMethInput.conf1_mode = param.tw.q.mode;
149
+ if (new Set(diffMethInput.conf1).size === 1) {
150
+ throw new Error("Confounding variable 1 has only one value");
151
+ }
152
+ }
153
+ if (param.tw2) {
154
+ diffMethInput.conf2 = [...conf2_group2, ...conf2_group1];
155
+ diffMethInput.conf2_mode = param.tw2.q.mode;
156
+ if (new Set(diffMethInput.conf2).size === 1) {
157
+ throw new Error("Confounding variable 2 has only one value");
158
+ }
159
+ }
160
+ const time1 = Date.now();
161
+ const result = JSON.parse(await run_R("diffMeth.R", JSON.stringify(diffMethInput)));
162
+ mayLog("Time taken to run diffMeth:", formatElapsedTime(Date.now() - time1));
163
+ const output = {
164
+ data: result.promoter_data,
165
+ sample_size1,
166
+ sample_size2
167
+ };
168
+ return output;
169
+ }
170
+ function validateGroups(sample_size1, sample_size2, group1names, group2names) {
171
+ const alerts = [];
172
+ if (sample_size1 < 1) alerts.push("sample size of group1 < 1");
173
+ if (sample_size2 < 1) alerts.push("sample size of group2 < 1");
174
+ const commonnames = group1names.filter((x) => group2names.includes(x));
175
+ if (commonnames.length) alerts.push(`Common elements found between both groups: ${commonnames.join(", ")}`);
176
+ return alerts;
177
+ }
178
+ export {
179
+ api
180
+ };