@sjcrh/proteinpaint-server 2.187.0 → 2.188.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import { ChatPayload } from "#types/checkers";
2
2
  import { mayLog } from "#src/helpers.ts";
3
3
  import { formatElapsedTime } from "#shared";
4
- import { readJSONFile, parse_geneset_db } from "./chat/utils.ts";
4
+ import { readJSONFile, parse_geneset_db, getChatRelatedPlotTypes } from "./chat/utils.ts";
5
5
  import { classifyQuery } from "./chat/classify1.ts";
6
6
  import { classifyPlotType } from "./chat/plot.ts";
7
7
  import { classifyNotPlot } from "./chat/classify2.ts";
@@ -11,6 +11,7 @@ import { getDsAllowedTermTypes } from "./termdb.config.ts";
11
11
  import { phrase2entity } from "./chat/phrase2entity.ts";
12
12
  import { inferTermObjFromEntity } from "./chat/entity2termObj.ts";
13
13
  import { resolveToTwTvs } from "./chat/entity2twTvs.ts";
14
+ import { answerDataQueries } from "./chat/dataQueries.ts";
14
15
  import path from "path";
15
16
  import fs from "fs";
16
17
  import { resolveToPlotState } from "./chat/scaffold2state.ts";
@@ -31,9 +32,9 @@ function init({ genomes }) {
31
32
  return async (req, res) => {
32
33
  const q = req.query;
33
34
  try {
34
- const g = genomes[q.genome];
35
- if (!g) throw "invalid genome";
36
- const ds = g.datasets?.[q.dslabel];
35
+ const genome = genomes[q.genome];
36
+ if (!genome) throw "invalid genome";
37
+ const ds = genome.datasets?.[q.dslabel];
37
38
  if (!ds) throw "invalid dslabel";
38
39
  const aiFilesDir = serverconfig.binpath + "/../../dataset/ai/" + q.dslabel;
39
40
  let agentFiles = [];
@@ -49,14 +50,24 @@ function init({ genomes }) {
49
50
  if (llm.provider !== "SJ" && llm.provider !== "ollama" && llm.provider !== "huggingface" && llm.provider !== "azure") {
50
51
  throw "llm.provider must be 'SJ', 'ollama', 'huggingface', or 'azure'";
51
52
  }
52
- const rawFilter = typeof q.filter === "string" ? JSON.parse(q.filter) : q.filter;
53
+ let rawFilter;
54
+ if (typeof q.filter === "string") {
55
+ try {
56
+ rawFilter = JSON.parse(q.filter);
57
+ } catch (e) {
58
+ throw new Error("Failed to parse filter JSON string: " + e);
59
+ }
60
+ } else {
61
+ rawFilter = q.filter;
62
+ }
53
63
  const filter = rawFilter && typeof rawFilter === "object" ? rawFilter : {};
54
64
  const lst = Array.isArray(filter.lst) ? filter.lst : [];
55
65
  const cohortFilter = lst.find((item) => item.tag === "cohortFilter");
56
66
  const cohortKey = cohortFilter ? cohortFilter.tvs.values[0].key : "";
57
- const supportedChartTypes = ds.cohort.termdb.q?.getSupportedChartTypes(req)?.[cohortKey];
58
- const genedb = serverconfig.tpmasterdir + "/" + g.genedb.dbfile;
59
- const _allowedTermTypes = getDsAllowedTermTypes(ds);
67
+ const supportedPlotTypes = ds.cohort.termdb.q?.getSupportedChartTypes(req)?.[cohortKey];
68
+ const chatSupportedPlotTypes = getChatRelatedPlotTypes(supportedPlotTypes);
69
+ const genedb = serverconfig.tpmasterdir + "/" + genome.genedb.dbfile;
70
+ const allowedTermTypes = getDsAllowedTermTypes(ds);
60
71
  const ai_output_json = await run_chat_pipeline(
61
72
  q.prompt,
62
73
  llm,
@@ -64,8 +75,9 @@ function init({ genomes }) {
64
75
  genedb,
65
76
  agentFiles,
66
77
  aiFilesDir,
67
- supportedChartTypes,
68
- _allowedTermTypes
78
+ chatSupportedPlotTypes,
79
+ allowedTermTypes,
80
+ genome
69
81
  // testing
70
82
  );
71
83
  mayLog("From init: Final AI output JSON:", JSON.stringify(ai_output_json));
@@ -76,19 +88,19 @@ function init({ genomes }) {
76
88
  }
77
89
  };
78
90
  }
79
- async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFilesDir, supportedChartTypes, _allowedTermTypes) {
91
+ async function run_chat_pipeline(userPrompt, llm, ds, genedb, agentFiles, aiFilesDir, supportedPlotTypes, allowedTermTypes, genome) {
80
92
  if (!fs.existsSync(path.join(aiFilesDir, "main.json")))
81
93
  throw "Main data file is not specified for dataset:" + ds.label;
82
94
  const dataset_json = await readJSONFile(path.join(aiFilesDir, "main.json"));
83
95
  const time1 = (/* @__PURE__ */ new Date()).valueOf();
84
- const class_response = await classifyQuery(user_prompt, llm);
96
+ const class_response = await classifyQuery(userPrompt, llm);
85
97
  mayLog("Time taken for classification:", formatElapsedTime(Date.now() - time1));
86
98
  let ai_output_json;
87
- if (class_response.type == "notplot") {
99
+ if (class_response.type === "notplot") {
88
100
  const time2 = (/* @__PURE__ */ new Date()).valueOf();
89
- const notPlotResult = await classifyNotPlot(user_prompt, llm, agentFiles, aiFilesDir);
101
+ const notPlotResult = await classifyNotPlot(userPrompt, llm, agentFiles, aiFilesDir);
90
102
  mayLog("Time taken for classify2:", formatElapsedTime(Date.now() - time2));
91
- if (notPlotResult.type == "html") {
103
+ if (notPlotResult.type === "html") {
92
104
  ai_output_json = notPlotResult;
93
105
  } else {
94
106
  ai_output_json = {
@@ -96,70 +108,72 @@ async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFil
96
108
  text: "Your query does not appear to be related to the available data visualizations. Please try rephrasing your question."
97
109
  };
98
110
  }
99
- } else if (class_response.type == "plot") {
111
+ } else if (class_response.type === "binaryQuery") {
112
+ const answer = await answerDataQueries(userPrompt, llm, allowedTermTypes);
113
+ if (!answer) throw "Couldn't decide if this is data related query!";
114
+ mayLog("Data Binary Query: ", answer);
115
+ ai_output_json = answer;
116
+ } else if (class_response.type === "plot") {
100
117
  let time = (/* @__PURE__ */ new Date()).valueOf();
101
- const plotType = await classifyPlotType(user_prompt, llm);
118
+ const plotType = await classifyPlotType(userPrompt, llm);
102
119
  mayLog("Time taken to classify plot type:", formatElapsedTime(Date.now() - time));
103
- if (!supportedChartTypes) {
104
- const errorMsg = "Supported chart types list is undefined. Please check the dataset configuration and ensure that getSupportedChartTypes is implemented correctly. Skipping chart type validation, but this may lead to unsupported chart type errors downstream.";
105
- console.warn(errorMsg);
106
- const errorResponse = {
120
+ if (!supportedPlotTypes.includes(plotType)) {
121
+ const log = 'Plot type: "' + plotType + '" is not supported.';
122
+ ai_output_json = {
107
123
  type: "text",
108
- text: errorMsg
124
+ text: log
109
125
  };
110
- return errorResponse;
111
- }
112
- if (plotType === "summary") {
113
- if (!supportedChartTypes.includes("dictionary")) {
114
- const log = 'Plot type: "' + plotType + '" is not supported.';
115
- ai_output_json = {
116
- type: "text",
117
- text: log
118
- };
119
- mayLog(log);
120
- return ai_output_json;
121
- }
122
- } else if (plotType === "dge") {
123
- if (!supportedChartTypes.includes("DA")) {
124
- const log = 'Plot type: "' + plotType + '" is not supported.';
125
- ai_output_json = {
126
- type: "text",
127
- text: log
128
- };
129
- mayLog(log);
130
- return ai_output_json;
131
- }
132
- } else {
133
- mayLog(`Supported chart types for this cohort: ${supportedChartTypes}`);
134
- if (!supportedChartTypes.includes(plotType)) {
135
- const log = 'Plot type: "' + plotType + '" is not supported.';
136
- ai_output_json = {
137
- type: "text",
138
- text: log
139
- };
140
- mayLog(log);
141
- return ai_output_json;
142
- }
126
+ mayLog(log);
127
+ return ai_output_json;
143
128
  }
129
+ const genes_list = await parse_geneset_db(genedb);
130
+ mayLog("#################################################");
144
131
  mayLog("####### First phase: Infer Plot Scaffolds #######");
132
+ mayLog("#################################################");
145
133
  time = (/* @__PURE__ */ new Date()).valueOf();
146
- const scaffoldResult = await inferScaffold(user_prompt, plotType, llm);
134
+ const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
135
+ const scaffoldResult = await inferScaffold(
136
+ userPrompt,
137
+ plotType,
138
+ llm,
139
+ genome,
140
+ genes_list,
141
+ allowedTermTypes,
142
+ dataset_json,
143
+ ds,
144
+ dataset_db
145
+ );
147
146
  mayLog("ScaffoldResult: ", scaffoldResult);
147
+ if (plotType === "hiercluster" && "plot" in scaffoldResult && scaffoldResult.type === "plot" || "text" in scaffoldResult && scaffoldResult.type === "text") {
148
+ return scaffoldResult;
149
+ }
148
150
  mayLog("Time taken to infer scaffold:", formatElapsedTime(Date.now() - time));
149
151
  if (!scaffoldResult)
150
152
  throw "Scaffold result is empty or undefined, which is unexpected. Please check the inferScaffold agent for potential issues.";
153
+ if ("type" in scaffoldResult && scaffoldResult.type === "text") {
154
+ return scaffoldResult;
155
+ }
151
156
  const subplotType = scaffoldResult.plotType === "summary" ? scaffoldResult.chartType : void 0;
157
+ mayLog("#################################################");
152
158
  mayLog("####### Second phase: From Scaffolds's phrases infer Entities #######");
153
- const genes_list = await parse_geneset_db(genedb);
159
+ mayLog("#################################################");
154
160
  time = (/* @__PURE__ */ new Date()).valueOf();
155
- const phrase2entityResult = await phrase2entity(scaffoldResult, plotType, llm, genes_list, dataset_json, ds);
161
+ const phrase2entityResult = await phrase2entity(
162
+ scaffoldResult,
163
+ plotType,
164
+ llm,
165
+ genes_list,
166
+ dataset_json,
167
+ ds
168
+ );
156
169
  mayLog("Time taken to phrase 2 entity:", formatElapsedTime(Date.now() - time));
157
170
  if ("type" in phrase2entityResult && phrase2entityResult.type === "text") {
158
171
  return phrase2entityResult;
159
172
  }
160
173
  mayLog(phrase2entityResult);
174
+ mayLog("#################################################");
161
175
  mayLog("####### Third phase: From Entities infer Term Objects #######");
162
- const dataset_db = serverconfig.tpmasterdir + "/" + ds.cohort.db.file;
176
+ mayLog("#################################################");
163
177
  time = (/* @__PURE__ */ new Date()).valueOf();
164
178
  const termObj = await inferTermObjFromEntity(
165
179
  phrase2entityResult,
@@ -170,7 +184,9 @@ async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFil
170
184
  );
171
185
  mayLog("Time taken to infer term objects:", formatElapsedTime(Date.now() - time));
172
186
  mayLog("Inferred termObj from entity:", JSON.stringify(termObj));
187
+ mayLog("#################################################");
173
188
  mayLog("####### Fourth phase: From Term Objects to TwTvs Objects #######");
189
+ mayLog("#################################################");
174
190
  time = (/* @__PURE__ */ new Date()).valueOf();
175
191
  const twTvsObj = await resolveToTwTvs(termObj, plotType, llm, dataset_db);
176
192
  mayLog("Time taken to resolve to TwTvs object from termObj:", formatElapsedTime(Date.now() - time));
@@ -178,7 +194,9 @@ async function run_chat_pipeline(user_prompt, llm, ds, genedb, agentFiles, aiFil
178
194
  return twTvsObj;
179
195
  }
180
196
  mayLog("twTvsObj:", twTvsObj);
197
+ mayLog("#################################################");
181
198
  mayLog("####### Fifth/Final phase: From TwTvs Objects to Plot States #######");
199
+ mayLog("#################################################");
182
200
  time = (/* @__PURE__ */ new Date()).valueOf();
183
201
  ai_output_json = resolveToPlotState(twTvsObj, plotType, subplotType);
184
202
  mayLog("Time taken to resolve to plot state:", formatElapsedTime(Date.now() - time));
@@ -83,7 +83,10 @@ async function getResult(q, ds) {
83
83
  ({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds));
84
84
  } else if (q.dataType == PROTEOME_ABUNDANCE) {
85
85
  ;
86
- ({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.get(_q));
86
+ ({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.get({
87
+ ..._q,
88
+ dataTypeDetails: _q.proteomeDetails
89
+ }));
87
90
  } else {
88
91
  ;
89
92
  ({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries[q.dataType].get(_q, ds));
@@ -225,6 +225,9 @@ function addNonDictionaryQueries(c, ds, genome) {
225
225
  JSON.stringify(src.caseFilter)
226
226
  );
227
227
  }
228
+ if (src.DAPfile) {
229
+ q2.proteome.organisms[organism].assays[assay].cohorts[cohort].DAPfile = true;
230
+ }
228
231
  }
229
232
  }
230
233
  }
@@ -0,0 +1,80 @@
1
+ import path from "path";
2
+ import fs from "fs/promises";
3
+ import { dapVolcanoPayload } from "#types/checkers";
4
+ import { get_ds_tdb } from "#src/termdb.js";
5
+ import { renderVolcano } from "../src/renderVolcano.ts";
6
+ import serverconfig from "../src/serverconfig.js";
7
+ import { countDistinctSamples } from "./termdb.proteome.ts";
8
+ const api = {
9
+ endpoint: "termdb/dapVolcano",
10
+ methods: {
11
+ get: {
12
+ ...dapVolcanoPayload,
13
+ init
14
+ },
15
+ post: {
16
+ ...dapVolcanoPayload,
17
+ init
18
+ }
19
+ }
20
+ };
21
+ function init({ genomes }) {
22
+ return async (req, res) => {
23
+ try {
24
+ const q = req.query;
25
+ const genome = genomes[q.genome];
26
+ if (!genome) throw "invalid genome";
27
+ const [ds] = get_ds_tdb(genome, q);
28
+ const proteomeConfig = ds.queries?.proteome;
29
+ if (!proteomeConfig) throw "proteome not configured for this dataset";
30
+ const organismConfig = proteomeConfig.organisms?.[q.organism];
31
+ if (!organismConfig) throw "invalid organism";
32
+ const assayConfig = organismConfig.assays?.[q.assay];
33
+ if (!assayConfig) throw "invalid assay";
34
+ const cohortConfig = assayConfig.cohorts?.[q.cohort];
35
+ if (!cohortConfig) throw "invalid cohort";
36
+ if (!cohortConfig.DAPfile) throw "DAP file not configured for this cohort";
37
+ const organismFilter = [{ columnIdx: organismConfig.columnIdx, columnValue: organismConfig.columnValue }];
38
+ const assayFilter = [{ columnIdx: assayConfig.columnIdx, columnValue: assayConfig.columnValue }];
39
+ const db = proteomeConfig.db;
40
+ const controlCount = countDistinctSamples(db, [...organismFilter, ...assayFilter, ...cohortConfig.controlFilter]);
41
+ const caseCount = countDistinctSamples(db, [...organismFilter, ...assayFilter, ...cohortConfig.caseFilter]);
42
+ if (q.countsOnly) {
43
+ res.send({ sample_size1: controlCount, sample_size2: caseCount });
44
+ return;
45
+ }
46
+ const filePath = path.join(serverconfig.tpmasterdir, cohortConfig.DAPfile);
47
+ const content = await fs.readFile(filePath, "utf8");
48
+ const lines = content.trim().split("\n");
49
+ const rustRows = [];
50
+ for (let i = 1; i < lines.length; i++) {
51
+ const parts = lines[i].split(" ");
52
+ if (parts.length < 4) continue;
53
+ const fc = Number(parts[2]);
54
+ if (!Number.isFinite(fc)) continue;
55
+ const pValue = Number(parts[3]);
56
+ if (!Number.isFinite(pValue)) continue;
57
+ rustRows.push({
58
+ gene_name: parts[0],
59
+ gene: parts[1],
60
+ fold_change: fc,
61
+ original_p_value: pValue,
62
+ adjusted_p_value: pValue
63
+ });
64
+ }
65
+ const rendered = await renderVolcano(rustRows, q.volcanoRender);
66
+ for (const d of rendered.dots) delete d.adjusted_p_value;
67
+ res.send({
68
+ data: rendered,
69
+ sample_size1: controlCount,
70
+ sample_size2: caseCount
71
+ });
72
+ } catch (e) {
73
+ res.send({ status: "error", error: e.message || e });
74
+ if (e instanceof Error && e.stack) console.log(e);
75
+ }
76
+ };
77
+ }
78
+ export {
79
+ api
80
+ };
@@ -42,12 +42,12 @@ function init({ genomes }) {
42
42
  term: {
43
43
  name: term.name,
44
44
  type: "proteomeAbundance",
45
- proteomeDetails: details
45
+ dataTypeDetails: details
46
46
  }
47
47
  };
48
48
  const cohortData = await ds.queries.proteome.get({
49
49
  terms: [tw],
50
- proteomeDetails: details,
50
+ dataTypeDetails: details,
51
51
  filter: q.filter,
52
52
  filter0: q.filter0,
53
53
  for: "proteinView",
@@ -249,7 +249,7 @@ async function validate_query_proteome(ds) {
249
249
  const proteins = arg?.proteins;
250
250
  if (!Array.isArray(proteins) || proteins.length == 0) throw "queries.proteome.find arg.proteins[] missing";
251
251
  const matches = /* @__PURE__ */ new Set();
252
- const details = arg?.proteomeDetails || {};
252
+ const details = arg?.dataTypeDetails || {};
253
253
  const organism = details.organism;
254
254
  const assay = details.assay;
255
255
  const cohort = details.cohort;
@@ -257,7 +257,7 @@ async function validate_query_proteome(ds) {
257
257
  const filters = [];
258
258
  if (Object.keys(details).length) {
259
259
  if (!organism || !assay || !cohort)
260
- throw "queries.proteome.find arg.proteomeDetails.{organism,assay,cohort} missing";
260
+ throw "queries.proteome.find arg.dataTypeDetails.{organism,assay,cohort} missing";
261
261
  const organismConfig = q.organisms?.[organism];
262
262
  if (!organismConfig) throw `queries.proteome.find invalid organism: ${organism}`;
263
263
  const assayConfig = organismConfig.assays?.[assay];
@@ -300,8 +300,8 @@ async function validate_query_proteome(ds) {
300
300
  };
301
301
  q.get = async (param) => {
302
302
  if (!param?.terms?.length) throw "queries.proteome.get param.terms[] missing";
303
- if (!param.proteomeDetails?.assay || !param.proteomeDetails?.cohort || !param.proteomeDetails?.organism)
304
- throw "queries.proteome.get param.proteomeDetails.{assay,cohort,organism} missing";
303
+ if (!param.dataTypeDetails?.assay || !param.dataTypeDetails?.cohort || !param.dataTypeDetails?.organism)
304
+ throw "queries.proteome.get param.dataTypeDetails.{assay,cohort,organism} missing";
305
305
  return await getProteomeValuesFromCohort(ds, param, q);
306
306
  };
307
307
  }
@@ -329,6 +329,12 @@ function buildFilterClause(filters) {
329
329
  }
330
330
  return { conditions, params };
331
331
  }
332
+ function countDistinctSamples(db, filters) {
333
+ if (!filters?.length) throw "countDistinctSamples: filters must not be empty";
334
+ const { conditions, params } = buildFilterClause(filters);
335
+ const row = db.prepare(`SELECT COUNT(DISTINCT sample) as cnt FROM proteome_abundance WHERE ${conditions.join(" AND ")}`).get(...params);
336
+ return row?.cnt || 0;
337
+ }
332
338
  function queryDbRows(db, matchColumn, matchValue, filters) {
333
339
  const { conditions, params } = buildFilterClause(filters);
334
340
  const allConditions = [`${matchColumn} = ? COLLATE NOCASE`, ...conditions];
@@ -339,7 +345,7 @@ function queryDbRows(db, matchColumn, matchValue, filters) {
339
345
  }
340
346
  async function getProteomeValuesFromCohort(ds, param, q) {
341
347
  const db = ds.queries.proteome.db;
342
- const { assay, cohort, organism } = param.proteomeDetails;
348
+ const { assay, cohort, organism } = param.dataTypeDetails;
343
349
  const organismConfig = q.organisms?.[organism];
344
350
  if (!organismConfig) throw `queries.proteome invalid organism: ${organism}`;
345
351
  const organismColumnIdx = organismConfig.columnIdx;
@@ -453,5 +459,6 @@ async function getProteomeValuesFromCohort(ds, param, q) {
453
459
  }
454
460
  export {
455
461
  api,
462
+ countDistinctSamples,
456
463
  validate_query_proteome
457
464
  };
@@ -53,6 +53,10 @@ function init({ genomes }) {
53
53
  cohortSamples = tmp[0];
54
54
  } else {
55
55
  if (!q.plotName) throw new Error("Neither plot name or coordinates where provided");
56
+ if (typeof ds.cohort?.scatterplots?.get == "function") {
57
+ const allowed = ds.cohort.scatterplots.get(q.__protected__?.clientAuthResult);
58
+ if (!allowed?.find((i) => i.name == q.plotName)) throw new Error("No permission to display plot");
59
+ }
56
60
  if (!Array.isArray(ds.cohort?.scatterplots?.plots)) throw new Error("not supported");
57
61
  const plot = ds.cohort.scatterplots.plots.find((p) => p.name == q.plotName);
58
62
  if (!plot) throw new Error(`plot not found with plotName ${q.plotName}`);
@@ -89,6 +89,17 @@ async function validateSamples(q, ds) {
89
89
  const S = q.samples, D = q.data;
90
90
  const samples = /* @__PURE__ */ new Map();
91
91
  for (const plot of D.plots) {
92
+ if (plot.isMetaResult) {
93
+ const sampleName = plot?.sampleId || plot.name.replace(/\s/g, "_");
94
+ const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, sampleName + (plot.fileSuffix || ""));
95
+ try {
96
+ await file_is_readable(tsvfile);
97
+ samples.set(sampleName, { sample: sampleName, isMetaResult: true });
98
+ } catch (e) {
99
+ throw new Error(`meta result data file missing or unreadable: ${sampleName} (${tsvfile}): ${e.message || e}`);
100
+ }
101
+ continue;
102
+ }
92
103
  for (const fn of await fs.promises.readdir(path.join(serverconfig.tpmasterdir, plot.folder))) {
93
104
  let sampleName = fn;
94
105
  if (plot.fileSuffix) {
@@ -1,11 +1,7 @@
1
1
  import { termdbTopVariablyExpressedGenesPayload } from "#types/checkers";
2
- import { run_rust } from "@sjcrh/proteinpaint-rust";
3
- import serverconfig from "#src/serverconfig.js";
4
2
  import { mayLimitSamples } from "#src/mds3.filter.js";
5
- import { makeFilter } from "#src/mds3.gdc.js";
6
- import { cachedFetch } from "#src/utils.js";
7
- import { joinUrl } from "#shared/joinUrl.js";
8
- import { formatElapsedTime } from "#shared/time.js";
3
+ import { run_python } from "@sjcrh/proteinpaint-python";
4
+ import { mayLog } from "#src/helpers.ts";
9
5
  const api = {
10
6
  endpoint: "termdb/topVariablyExpressedGenes",
11
7
  methods: {
@@ -29,27 +25,23 @@ function init({ genomes }) {
29
25
  const ds = genome.datasets?.[q.dslabel];
30
26
  if (!ds) throw "invalid dslabel";
31
27
  if (!ds.queries?.topVariablyExpressedGenes) throw "not supported on dataset";
28
+ q.ds = ds;
32
29
  const t = Date.now();
33
30
  result = {
34
31
  genes: await ds.queries.topVariablyExpressedGenes.getGenes(q)
35
32
  };
36
- console.log("compute top variably expressed genes:", formatElapsedTime(Date.now() - t));
33
+ mayLog("time for top variably expressed genes", Date.now() - t);
37
34
  } catch (e) {
38
35
  result = { status: e.status || 400, error: e.message || e };
39
36
  }
40
37
  res.send(result);
41
38
  };
42
39
  }
43
- function validate_query_TopVariablyExpressedGenes(ds, genome) {
40
+ function validate_query_TopVariablyExpressedGenes(ds) {
44
41
  const q = ds.queries.topVariablyExpressedGenes;
45
42
  if (!q) return;
46
- if (q.src == "gdcapi") {
47
- gdcValidateQuery(ds, genome);
48
- } else if (q.src == "native") {
49
- nativeValidateQuery(ds);
50
- } else {
51
- throw "unknown topVariablyExpressedGenes.src";
52
- }
43
+ if (typeof q.getGenes == "function") return;
44
+ nativeValidateQuery(ds);
53
45
  }
54
46
  function nativeValidateQuery(ds) {
55
47
  const gE = ds.queries.geneExpression;
@@ -83,21 +75,7 @@ function addTopVEarg(q) {
83
75
  id: "filter_extreme_values",
84
76
  label: "Filter Extreme Values",
85
77
  type: "boolean",
86
- value: true,
87
- options: [
88
- {
89
- id: "min_count",
90
- label: "Min count",
91
- type: "number",
92
- value: 10
93
- },
94
- {
95
- id: "min_total_count",
96
- label: "Min total count",
97
- type: "number",
98
- value: 15
99
- }
100
- ]
78
+ value: true
101
79
  },
102
80
  {
103
81
  id: "rank_type",
@@ -134,124 +112,20 @@ function addTopVEarg(q) {
134
112
  q.arguments = arglst;
135
113
  }
136
114
  async function computeGenes4nativeDs(q, gE, samples) {
115
+ if (!["number", "boolean"].includes(typeof q.filter_extreme_values) || q.filter_extreme_values === void 0) {
116
+ q.filter_extreme_values = false;
117
+ }
137
118
  const input_json = {
138
119
  input_file: gE.file,
139
120
  samples: samples.join(","),
140
- filter_extreme_values: q.filter_extreme_values,
141
- num_genes: q.maxGenes,
142
- rank_type: q.rank_type?.type
121
+ filter_extreme_values: typeof q.filter_extreme_values === "number" ? Boolean(q.filter_extreme_values) : q.filter_extreme_values,
122
+ max_genes: q.maxGenes,
123
+ rank_type: q.rank_type?.type ?? "var"
143
124
  };
144
- if (q.filter_extreme_values == 1) {
145
- input_json["min_count"] = q.min_count;
146
- input_json["min_total_count"] = q.min_total_count;
147
- }
148
- if (gE.newformat) {
149
- input_json["newformat"] = true;
150
- }
151
- const rust_output = await run_rust("topGeneByExpressionVariance", JSON.stringify(input_json));
152
- const rust_output_list = rust_output.split("\n");
153
- let output_json;
154
- for (const item of rust_output_list) {
155
- if (item.includes("output_json:")) {
156
- output_json = JSON.parse(item.replace("output_json:", ""));
157
- } else {
158
- console.log(item);
159
- }
160
- }
161
- const varGenes = output_json.map((i) => i.gene_symbol);
125
+ const python_output = await run_python("topVEgene.py", JSON.stringify(input_json));
126
+ const varGenes = typeof python_output === "string" ? JSON.parse(python_output) : [];
162
127
  return varGenes;
163
128
  }
164
- function gdcValidateQuery(ds, genome) {
165
- ds.queries.topVariablyExpressedGenes.getGenes = async (q) => {
166
- if (serverconfig.features.gdcGenes) {
167
- console.error(
168
- "!!GDC!! using serverconfig.features.gdcGenes[] but not live api query. only use this on DEV and never on PROD!"
169
- );
170
- return serverconfig.features.gdcGenes;
171
- }
172
- if (ds.label === "GDC" && !ds.__gdc?.doneCaching) {
173
- throw "The server has not finished caching the case IDs: try again in about 2 minutes.";
174
- }
175
- const { host, headers } = ds.getHostHeaders(q);
176
- try {
177
- const response = await cachedFetch(
178
- joinUrl(host.rest, "/gene_expression/gene_selection"),
179
- {
180
- method: "POST",
181
- headers,
182
- body: getGeneSelectionArg(q)
183
- },
184
- {
185
- // noCache: true, // !!! for testing only !!!
186
- getErrMessage: (response2) => {
187
- const body = response2?.body || response2;
188
- return Array.isArray(body?.gene_selection) ? "" : body?.message || body?.error || JSON.stringify(body);
189
- }
190
- }
191
- );
192
- const re = response.body;
193
- const genes = [];
194
- if (!Array.isArray(re.gene_selection)) {
195
- throw "re.gene_selection[] is not array: " + JSON.stringify(re);
196
- }
197
- for (const i of re.gene_selection) {
198
- if (i.gene_id && typeof i.gene_id == "string") {
199
- const t = genome.genedb.getNameByAlias.get(i.gene_id);
200
- if (t) genes.push(t.name);
201
- } else if (i.symbol && typeof i.symbol == "string") {
202
- genes.push(i.symbol);
203
- } else {
204
- throw "one of re.gene_selection[] is missing both gene_id and symbol";
205
- }
206
- }
207
- return genes;
208
- } catch (e) {
209
- console.error(e.stack || e);
210
- throw e;
211
- }
212
- };
213
- function getGeneSelectionArg(q) {
214
- const arg = {
215
- // add any to avoid tsc err
216
- case_filters: makeFilter(q),
217
- selection_size: q.maxGenes,
218
- min_median_log2_uqfpkm: q.min_median_log2_uqfpkm
219
- };
220
- if (q.geneSet) {
221
- if (q.geneSet.type == "all") {
222
- arg.gene_type = "protein_coding";
223
- } else if (q.geneSet.type == "custom" || q.geneSet.type == "msigdb") {
224
- if (!Array.isArray(q.geneSet.geneList)) throw "q.geneSet.geneList is not array";
225
- arg.gene_ids = map2ensg(q.geneSet.geneList, genome);
226
- if (arg.gene_ids.length == 0) throw "no valid genes from custom gene set";
227
- } else {
228
- throw "unknown q.geneSet.type";
229
- }
230
- } else {
231
- arg.gene_type = "protein_coding";
232
- }
233
- return arg;
234
- }
235
- }
236
- function map2ensg(lst, genome) {
237
- const ensg = [];
238
- for (const name of lst) {
239
- if (name.startsWith("ENSG") && name.length == 15) {
240
- ensg.push(name);
241
- continue;
242
- }
243
- const tmp = genome.genedb.getAliasByName.all(name);
244
- if (Array.isArray(tmp)) {
245
- for (const a of tmp) {
246
- if (a.alias.startsWith("ENSG")) {
247
- ensg.push(a.alias);
248
- break;
249
- }
250
- }
251
- }
252
- }
253
- return ensg;
254
- }
255
129
  export {
256
130
  api,
257
131
  validate_query_TopVariablyExpressedGenes