@sjcrh/proteinpaint-server 2.186.0 → 2.188.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,12 +42,12 @@ function init({ genomes }) {
42
42
  term: {
43
43
  name: term.name,
44
44
  type: "proteomeAbundance",
45
- proteomeDetails: details
45
+ dataTypeDetails: details
46
46
  }
47
47
  };
48
48
  const cohortData = await ds.queries.proteome.get({
49
49
  terms: [tw],
50
- proteomeDetails: details,
50
+ dataTypeDetails: details,
51
51
  filter: q.filter,
52
52
  filter0: q.filter0,
53
53
  for: "proteinView",
@@ -64,6 +64,7 @@ function init({ genomes }) {
64
64
  entry.testedN = stats.testedN;
65
65
  entry.controlN = stats.controlN;
66
66
  if (assay.mclassOverride) entry.mclassOverride = assay.mclassOverride;
67
+ if (organism.genomeName) entry.genomeName = organism.genomeName;
67
68
  cohorts.push(entry);
68
69
  }
69
70
  }
@@ -248,7 +249,7 @@ async function validate_query_proteome(ds) {
248
249
  const proteins = arg?.proteins;
249
250
  if (!Array.isArray(proteins) || proteins.length == 0) throw "queries.proteome.find arg.proteins[] missing";
250
251
  const matches = /* @__PURE__ */ new Set();
251
- const details = arg?.proteomeDetails || {};
252
+ const details = arg?.dataTypeDetails || {};
252
253
  const organism = details.organism;
253
254
  const assay = details.assay;
254
255
  const cohort = details.cohort;
@@ -256,7 +257,7 @@ async function validate_query_proteome(ds) {
256
257
  const filters = [];
257
258
  if (Object.keys(details).length) {
258
259
  if (!organism || !assay || !cohort)
259
- throw "queries.proteome.find arg.proteomeDetails.{organism,assay,cohort} missing";
260
+ throw "queries.proteome.find arg.dataTypeDetails.{organism,assay,cohort} missing";
260
261
  const organismConfig = q.organisms?.[organism];
261
262
  if (!organismConfig) throw `queries.proteome.find invalid organism: ${organism}`;
262
263
  const assayConfig = organismConfig.assays?.[assay];
@@ -299,8 +300,8 @@ async function validate_query_proteome(ds) {
299
300
  };
300
301
  q.get = async (param) => {
301
302
  if (!param?.terms?.length) throw "queries.proteome.get param.terms[] missing";
302
- if (!param.proteomeDetails?.assay || !param.proteomeDetails?.cohort || !param.proteomeDetails?.organism)
303
- throw "queries.proteome.get param.proteomeDetails.{assay,cohort,organism} missing";
303
+ if (!param.dataTypeDetails?.assay || !param.dataTypeDetails?.cohort || !param.dataTypeDetails?.organism)
304
+ throw "queries.proteome.get param.dataTypeDetails.{assay,cohort,organism} missing";
304
305
  return await getProteomeValuesFromCohort(ds, param, q);
305
306
  };
306
307
  }
@@ -328,6 +329,12 @@ function buildFilterClause(filters) {
328
329
  }
329
330
  return { conditions, params };
330
331
  }
332
+ function countDistinctSamples(db, filters) {
333
+ if (!filters?.length) throw "countDistinctSamples: filters must not be empty";
334
+ const { conditions, params } = buildFilterClause(filters);
335
+ const row = db.prepare(`SELECT COUNT(DISTINCT sample) as cnt FROM proteome_abundance WHERE ${conditions.join(" AND ")}`).get(...params);
336
+ return row?.cnt || 0;
337
+ }
331
338
  function queryDbRows(db, matchColumn, matchValue, filters) {
332
339
  const { conditions, params } = buildFilterClause(filters);
333
340
  const allConditions = [`${matchColumn} = ? COLLATE NOCASE`, ...conditions];
@@ -338,7 +345,7 @@ function queryDbRows(db, matchColumn, matchValue, filters) {
338
345
  }
339
346
  async function getProteomeValuesFromCohort(ds, param, q) {
340
347
  const db = ds.queries.proteome.db;
341
- const { assay, cohort, organism } = param.proteomeDetails;
348
+ const { assay, cohort, organism } = param.dataTypeDetails;
342
349
  const organismConfig = q.organisms?.[organism];
343
350
  if (!organismConfig) throw `queries.proteome invalid organism: ${organism}`;
344
351
  const organismColumnIdx = organismConfig.columnIdx;
@@ -452,5 +459,6 @@ async function getProteomeValuesFromCohort(ds, param, q) {
452
459
  }
453
460
  export {
454
461
  api,
462
+ countDistinctSamples,
455
463
  validate_query_proteome
456
464
  };
@@ -53,6 +53,10 @@ function init({ genomes }) {
53
53
  cohortSamples = tmp[0];
54
54
  } else {
55
55
  if (!q.plotName) throw new Error("Neither plot name or coordinates where provided");
56
+ if (typeof ds.cohort?.scatterplots?.get == "function") {
57
+ const allowed = ds.cohort.scatterplots.get(q.__protected__?.clientAuthResult);
58
+ if (!allowed?.find((i) => i.name == q.plotName)) throw new Error("No permission to display plot");
59
+ }
56
60
  if (!Array.isArray(ds.cohort?.scatterplots?.plots)) throw new Error("not supported");
57
61
  const plot = ds.cohort.scatterplots.plots.find((p) => p.name == q.plotName);
58
62
  if (!plot) throw new Error(`plot not found with plotName ${q.plotName}`);
@@ -89,6 +89,17 @@ async function validateSamples(q, ds) {
89
89
  const S = q.samples, D = q.data;
90
90
  const samples = /* @__PURE__ */ new Map();
91
91
  for (const plot of D.plots) {
92
+ if (plot.isMetaResult) {
93
+ const sampleName = plot?.sampleId || plot.name.replace(/\s/g, "_");
94
+ const tsvfile = path.join(serverconfig.tpmasterdir, plot.folder, sampleName + (plot.fileSuffix || ""));
95
+ try {
96
+ await file_is_readable(tsvfile);
97
+ samples.set(sampleName, { sample: sampleName, isMetaResult: true });
98
+ } catch (e) {
99
+ throw new Error(`meta result data file missing or unreadable: ${sampleName} (${tsvfile}): ${e.message || e}`);
100
+ }
101
+ continue;
102
+ }
92
103
  for (const fn of await fs.promises.readdir(path.join(serverconfig.tpmasterdir, plot.folder))) {
93
104
  let sampleName = fn;
94
105
  if (plot.fileSuffix) {
@@ -1,11 +1,7 @@
1
1
  import { termdbTopVariablyExpressedGenesPayload } from "#types/checkers";
2
- import { run_rust } from "@sjcrh/proteinpaint-rust";
3
- import serverconfig from "#src/serverconfig.js";
4
2
  import { mayLimitSamples } from "#src/mds3.filter.js";
5
- import { makeFilter } from "#src/mds3.gdc.js";
6
- import { cachedFetch } from "#src/utils.js";
7
- import { joinUrl } from "#shared/joinUrl.js";
8
- import { formatElapsedTime } from "#shared/time.js";
3
+ import { run_python } from "@sjcrh/proteinpaint-python";
4
+ import { mayLog } from "#src/helpers.ts";
9
5
  const api = {
10
6
  endpoint: "termdb/topVariablyExpressedGenes",
11
7
  methods: {
@@ -29,27 +25,23 @@ function init({ genomes }) {
29
25
  const ds = genome.datasets?.[q.dslabel];
30
26
  if (!ds) throw "invalid dslabel";
31
27
  if (!ds.queries?.topVariablyExpressedGenes) throw "not supported on dataset";
28
+ q.ds = ds;
32
29
  const t = Date.now();
33
30
  result = {
34
31
  genes: await ds.queries.topVariablyExpressedGenes.getGenes(q)
35
32
  };
36
- console.log("compute top variably expressed genes:", formatElapsedTime(Date.now() - t));
33
+ mayLog("time for top variably expressed genes", Date.now() - t);
37
34
  } catch (e) {
38
35
  result = { status: e.status || 400, error: e.message || e };
39
36
  }
40
37
  res.send(result);
41
38
  };
42
39
  }
43
- function validate_query_TopVariablyExpressedGenes(ds, genome) {
40
+ function validate_query_TopVariablyExpressedGenes(ds) {
44
41
  const q = ds.queries.topVariablyExpressedGenes;
45
42
  if (!q) return;
46
- if (q.src == "gdcapi") {
47
- gdcValidateQuery(ds, genome);
48
- } else if (q.src == "native") {
49
- nativeValidateQuery(ds);
50
- } else {
51
- throw "unknown topVariablyExpressedGenes.src";
52
- }
43
+ if (typeof q.getGenes == "function") return;
44
+ nativeValidateQuery(ds);
53
45
  }
54
46
  function nativeValidateQuery(ds) {
55
47
  const gE = ds.queries.geneExpression;
@@ -83,21 +75,7 @@ function addTopVEarg(q) {
83
75
  id: "filter_extreme_values",
84
76
  label: "Filter Extreme Values",
85
77
  type: "boolean",
86
- value: true,
87
- options: [
88
- {
89
- id: "min_count",
90
- label: "Min count",
91
- type: "number",
92
- value: 10
93
- },
94
- {
95
- id: "min_total_count",
96
- label: "Min total count",
97
- type: "number",
98
- value: 15
99
- }
100
- ]
78
+ value: true
101
79
  },
102
80
  {
103
81
  id: "rank_type",
@@ -134,124 +112,20 @@ function addTopVEarg(q) {
134
112
  q.arguments = arglst;
135
113
  }
136
114
  async function computeGenes4nativeDs(q, gE, samples) {
115
+ if (!["number", "boolean"].includes(typeof q.filter_extreme_values) || q.filter_extreme_values === void 0) {
116
+ q.filter_extreme_values = false;
117
+ }
137
118
  const input_json = {
138
119
  input_file: gE.file,
139
120
  samples: samples.join(","),
140
- filter_extreme_values: q.filter_extreme_values,
141
- num_genes: q.maxGenes,
142
- rank_type: q.rank_type?.type
121
+ filter_extreme_values: typeof q.filter_extreme_values === "number" ? Boolean(q.filter_extreme_values) : q.filter_extreme_values,
122
+ max_genes: q.maxGenes,
123
+ rank_type: q.rank_type?.type ?? "var"
143
124
  };
144
- if (q.filter_extreme_values == 1) {
145
- input_json["min_count"] = q.min_count;
146
- input_json["min_total_count"] = q.min_total_count;
147
- }
148
- if (gE.newformat) {
149
- input_json["newformat"] = true;
150
- }
151
- const rust_output = await run_rust("topGeneByExpressionVariance", JSON.stringify(input_json));
152
- const rust_output_list = rust_output.split("\n");
153
- let output_json;
154
- for (const item of rust_output_list) {
155
- if (item.includes("output_json:")) {
156
- output_json = JSON.parse(item.replace("output_json:", ""));
157
- } else {
158
- console.log(item);
159
- }
160
- }
161
- const varGenes = output_json.map((i) => i.gene_symbol);
125
+ const python_output = await run_python("topVEgene.py", JSON.stringify(input_json));
126
+ const varGenes = typeof python_output === "string" ? JSON.parse(python_output) : [];
162
127
  return varGenes;
163
128
  }
164
- function gdcValidateQuery(ds, genome) {
165
- ds.queries.topVariablyExpressedGenes.getGenes = async (q) => {
166
- if (serverconfig.features.gdcGenes) {
167
- console.error(
168
- "!!GDC!! using serverconfig.features.gdcGenes[] but not live api query. only use this on DEV and never on PROD!"
169
- );
170
- return serverconfig.features.gdcGenes;
171
- }
172
- if (ds.label === "GDC" && !ds.__gdc?.doneCaching) {
173
- throw "The server has not finished caching the case IDs: try again in about 2 minutes.";
174
- }
175
- const { host, headers } = ds.getHostHeaders(q);
176
- try {
177
- const response = await cachedFetch(
178
- joinUrl(host.rest, "/gene_expression/gene_selection"),
179
- {
180
- method: "POST",
181
- headers,
182
- body: getGeneSelectionArg(q)
183
- },
184
- {
185
- // noCache: true, // !!! for testing only !!!
186
- getErrMessage: (response2) => {
187
- const body = response2?.body || response2;
188
- return Array.isArray(body?.gene_selection) ? "" : body?.message || body?.error || JSON.stringify(body);
189
- }
190
- }
191
- );
192
- const re = response.body;
193
- const genes = [];
194
- if (!Array.isArray(re.gene_selection)) {
195
- throw "re.gene_selection[] is not array: " + JSON.stringify(re);
196
- }
197
- for (const i of re.gene_selection) {
198
- if (i.gene_id && typeof i.gene_id == "string") {
199
- const t = genome.genedb.getNameByAlias.get(i.gene_id);
200
- if (t) genes.push(t.name);
201
- } else if (i.symbol && typeof i.symbol == "string") {
202
- genes.push(i.symbol);
203
- } else {
204
- throw "one of re.gene_selection[] is missing both gene_id and symbol";
205
- }
206
- }
207
- return genes;
208
- } catch (e) {
209
- console.error(e.stack || e);
210
- throw e;
211
- }
212
- };
213
- function getGeneSelectionArg(q) {
214
- const arg = {
215
- // add any to avoid tsc err
216
- case_filters: makeFilter(q),
217
- selection_size: q.maxGenes,
218
- min_median_log2_uqfpkm: q.min_median_log2_uqfpkm
219
- };
220
- if (q.geneSet) {
221
- if (q.geneSet.type == "all") {
222
- arg.gene_type = "protein_coding";
223
- } else if (q.geneSet.type == "custom" || q.geneSet.type == "msigdb") {
224
- if (!Array.isArray(q.geneSet.geneList)) throw "q.geneSet.geneList is not array";
225
- arg.gene_ids = map2ensg(q.geneSet.geneList, genome);
226
- if (arg.gene_ids.length == 0) throw "no valid genes from custom gene set";
227
- } else {
228
- throw "unknown q.geneSet.type";
229
- }
230
- } else {
231
- arg.gene_type = "protein_coding";
232
- }
233
- return arg;
234
- }
235
- }
236
- function map2ensg(lst, genome) {
237
- const ensg = [];
238
- for (const name of lst) {
239
- if (name.startsWith("ENSG") && name.length == 15) {
240
- ensg.push(name);
241
- continue;
242
- }
243
- const tmp = genome.genedb.getAliasByName.all(name);
244
- if (Array.isArray(tmp)) {
245
- for (const a of tmp) {
246
- if (a.alias.startsWith("ENSG")) {
247
- ensg.push(a.alias);
248
- break;
249
- }
250
- }
251
- }
252
- }
253
- return ensg;
254
- }
255
129
  export {
256
130
  api,
257
131
  validate_query_TopVariablyExpressedGenes