@sjcrh/proteinpaint-server 2.184.1-0 → 2.185.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,11 @@
1
- import fs from "fs";
2
1
  import path from "path";
3
2
  import { diffExpPayload } from "#types/checkers";
4
- import { run_rust } from "@sjcrh/proteinpaint-rust";
5
- import { getData } from "../src/termdb.matrix.js";
6
- import { get_ds_tdb } from "../src/termdb.js";
7
- import { run_R } from "@sjcrh/proteinpaint-r";
8
3
  import { mayLog } from "#src/helpers.ts";
9
4
  import serverconfig from "../src/serverconfig.js";
10
- import { imageSize } from "image-size";
11
5
  import { get_header_txt } from "#src/utils.js";
12
- import { formatElapsedTime } from "#shared";
6
+ import { run_rust } from "@sjcrh/proteinpaint-rust";
7
+ import { renderVolcano } from "../src/renderVolcano.ts";
8
+ import { readCacheFileOrRecompute, resolveDeContext, resolveSampleGroups } from "../src/diffAnalysis.ts";
13
9
  const api = {
14
10
  endpoint: "termdb/DE",
15
11
  methods: {
@@ -27,244 +23,41 @@ function init({ genomes }) {
27
23
  return async (req, res) => {
28
24
  try {
29
25
  const q = req.query;
30
- const genome = genomes[q.genome];
31
- if (!genome) throw new Error("invalid genome");
32
- const [ds] = get_ds_tdb(genome, q);
33
- let term_results = [];
34
- if (q.tw) {
35
- const terms = [q.tw];
36
- term_results = await getData(
37
- {
38
- filter: q.filter,
39
- filter0: q.filter0,
40
- terms
41
- },
42
- ds
43
- );
44
- if (term_results.error) throw new Error(term_results.error);
45
- }
46
- let term_results2 = [];
47
- if (q.tw2) {
48
- const terms2 = [q.tw2];
49
- term_results2 = await getData(
50
- {
51
- filter: q.filter,
52
- filter0: q.filter0,
53
- terms: terms2
54
- },
55
- ds
56
- );
57
- if (term_results2.error) throw new Error(term_results2.error);
26
+ if (q.preAnalysis) {
27
+ const { ds, term_results, term_results2 } = await resolveDeContext(q, genomes);
28
+ const groups = resolveSampleGroups(q, ds, term_results, term_results2);
29
+ const group1Name = q.samplelst.groups[0].name;
30
+ const group2Name = q.samplelst.groups[1].name;
31
+ res.send({
32
+ data: {
33
+ [group1Name]: groups.group1names.length,
34
+ [group2Name]: groups.group2names.length,
35
+ ...groups.alerts.length ? { alert: groups.alerts.join(" | ") } : {}
36
+ }
37
+ });
38
+ return;
58
39
  }
59
- const results = await run_DE(req.query, ds, term_results, term_results2);
60
- if (!results || !results.data) throw new Error("No data available");
61
- res.send(results);
40
+ const { cacheId, geneData, sample_size1, sample_size2, method, images, bcv } = await readCacheFileOrRecompute({
41
+ daRequest: q,
42
+ genomes
43
+ });
44
+ const rendered = await renderVolcano(geneData, q.volcanoRender);
45
+ rendered.cacheId = cacheId;
46
+ const output = {
47
+ data: rendered,
48
+ sample_size1,
49
+ sample_size2,
50
+ method,
51
+ images
52
+ };
53
+ if (bcv != null) output.bcv = bcv;
54
+ res.send(output);
62
55
  } catch (e) {
63
56
  res.send({ status: "error", error: e.message || e });
64
57
  if (e instanceof Error && e.stack) console.log(e);
65
58
  }
66
59
  };
67
60
  }
68
- async function run_DE(param, ds, term_results, term_results2) {
69
- if (param.samplelst?.groups?.length != 2) throw new Error(".samplelst.groups.length!=2");
70
- if (param.samplelst.groups[0].values?.length < 1) throw new Error("samplelst.groups[0].values.length<1");
71
- if (param.samplelst.groups[1].values?.length < 1) throw new Error("samplelst.groups[1].values.length<1");
72
- const q = ds.queries.rnaseqGeneCount;
73
- if (!q) return;
74
- if (!q.file) throw new Error("unknown data type for rnaseqGeneCount");
75
- if (!q.storage_type) throw new Error("storage_type is not defined");
76
- param.storage_type = q.storage_type;
77
- const group1names = [];
78
- const conf1_group1 = [];
79
- const conf2_group1 = [];
80
- for (const s of param.samplelst.groups[0].values) {
81
- if (!Number.isInteger(s.sampleId)) continue;
82
- const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
83
- if (!n) continue;
84
- if (q.allSampleSet.has(n)) {
85
- if (param.tw && !param.tw2) {
86
- if (term_results.samples[s.sampleId]) {
87
- if (param.tw.q.mode == "continuous") {
88
- conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
89
- } else {
90
- conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
91
- }
92
- group1names.push(n);
93
- }
94
- } else if (!param.tw && param.tw2) {
95
- if (term_results2.samples[s.sampleId]) {
96
- if (param.tw2.q.mode == "continuous") {
97
- conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
98
- } else {
99
- conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
100
- }
101
- group1names.push(n);
102
- }
103
- } else if (param.tw && param.tw2) {
104
- if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
105
- if (param.tw.q.mode == "continuous") {
106
- conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
107
- } else {
108
- conf1_group1.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
109
- }
110
- if (param.tw2.q.mode == "continuous") {
111
- conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
112
- } else {
113
- conf2_group1.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
114
- }
115
- group1names.push(n);
116
- }
117
- } else {
118
- group1names.push(n);
119
- }
120
- }
121
- }
122
- const group2names = [];
123
- const conf1_group2 = [];
124
- const conf2_group2 = [];
125
- for (const s of param.samplelst.groups[1].values) {
126
- if (!Number.isInteger(s.sampleId)) continue;
127
- const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
128
- if (!n) continue;
129
- if (q.allSampleSet.has(n)) {
130
- if (param.tw && !param.tw2) {
131
- if (term_results.samples[s.sampleId]) {
132
- if (param.tw.q.mode == "continuous") {
133
- conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
134
- } else {
135
- conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
136
- }
137
- group2names.push(n);
138
- }
139
- } else if (!param.tw && param.tw2) {
140
- if (term_results2.samples[s.sampleId]) {
141
- if (param.tw2.q.mode == "continuous") {
142
- conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
143
- } else {
144
- conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
145
- }
146
- group2names.push(n);
147
- }
148
- } else if (param.tw && param.tw2) {
149
- if (term_results.samples[s.sampleId] && term_results2.samples[s.sampleId]) {
150
- if (param.tw.q.mode == "continuous") {
151
- conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["value"]);
152
- } else {
153
- conf1_group2.push(term_results.samples[s.sampleId][param.tw.$id]["key"]);
154
- }
155
- if (param.tw2.q.mode == "continuous") {
156
- conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["value"]);
157
- } else {
158
- conf2_group2.push(term_results2.samples[s.sampleId][param.tw2.$id]["key"]);
159
- }
160
- group2names.push(n);
161
- }
162
- } else {
163
- group2names.push(n);
164
- }
165
- }
166
- }
167
- const sample_size1 = group1names.length;
168
- const sample_size2 = group2names.length;
169
- const alerts = validateGroups(sample_size1, sample_size2, group1names, group2names);
170
- if (param.preAnalysis) {
171
- const group1Name = param.samplelst.groups[0].name;
172
- const group2Name = param.samplelst.groups[1].name;
173
- return {
174
- data: {
175
- [group1Name]: sample_size1,
176
- [group2Name]: sample_size2,
177
- ...alerts.length ? { alert: alerts.join(" | ") } : {}
178
- }
179
- };
180
- }
181
- if (alerts.length) throw new Error(alerts.join(" | "));
182
- const cases_string = group2names.map((i) => i).join(",");
183
- const controls_string = group1names.map((i) => i).join(",");
184
- const expression_input = {
185
- case: cases_string,
186
- control: controls_string,
187
- data_type: "do_DE",
188
- input_file: q.file,
189
- cachedir: serverconfig.cachedir,
190
- min_count: param.min_count,
191
- min_total_count: param.min_total_count,
192
- cpm_cutoff: param.cpm_cutoff,
193
- storage_type: param.storage_type,
194
- DE_method: param.method,
195
- mds_cutoff: 1e4
196
- // If the dimensions of the read counts matrix is below this threshold, only then the mds image will be generated as its very compute intensive. Number of genes * Number of samples < mds_cutoff for mds generation
197
- };
198
- if (param.tw) {
199
- expression_input.conf1 = [...conf1_group2, ...conf1_group1];
200
- expression_input.conf1_mode = param.tw.q.mode;
201
- if (new Set(expression_input.conf1).size === 1) {
202
- throw new Error("Confounding variable 1 has only one value");
203
- }
204
- }
205
- if (param.tw2) {
206
- expression_input.conf2 = [...conf2_group2, ...conf2_group1];
207
- expression_input.conf2_mode = param.tw2.q.mode;
208
- if (new Set(expression_input.conf2).size === 1) {
209
- throw new Error("Confounding variable 2 has only one value");
210
- }
211
- }
212
- const sample_size_limit = 8;
213
- if (group1names.length <= sample_size_limit && group2names.length <= sample_size_limit || param.method == "edgeR" || param.method == "limma") {
214
- const time12 = (/* @__PURE__ */ new Date()).valueOf();
215
- const result2 = JSON.parse(await run_R("edge_newh5.R", JSON.stringify(expression_input)));
216
- mayLog("Time taken to run edgeR:", formatElapsedTime(Date.now() - time12));
217
- param.method = "edgeR";
218
- const ql_imagePath = path.join(serverconfig.cachedir, result2.edgeR_ql_image_name[0]);
219
- mayLog("ql_imagePath:", ql_imagePath);
220
- await readFileAndDelete(ql_imagePath, "ql_image", result2);
221
- if (result2.edgeR_mds_image_name) {
222
- const mds_imagePath = path.join(serverconfig.cachedir, result2.edgeR_mds_image_name[0]);
223
- mayLog("mds_imagePath:", mds_imagePath);
224
- await readFileAndDelete(mds_imagePath, "mds_image", result2);
225
- }
226
- const images = [result2.ql_image];
227
- if (result2.mds_image) images.push(result2.mds_image);
228
- const output = {
229
- data: result2.gene_data,
230
- sample_size2: result2.num_cases[0],
231
- sample_size1: result2.num_controls[0],
232
- method: param.method,
233
- images
234
- };
235
- if (result2.bcv && result2.bcv[0] !== null && result2.bcv[0] !== void 0) {
236
- output.bcv = result2.bcv[0];
237
- }
238
- return output;
239
- }
240
- const time1 = (/* @__PURE__ */ new Date()).valueOf();
241
- const result = JSON.parse(await run_rust("DEanalysis", JSON.stringify(expression_input)));
242
- mayLog("Time taken to run rust DE pipeline:", formatElapsedTime(Date.now() - time1));
243
- param.method = "wilcoxon";
244
- return { data: result, sample_size1, sample_size2, method: param.method };
245
- }
246
- function validateGroups(sample_size1, sample_size2, group1names, group2names) {
247
- const alerts = [];
248
- if (sample_size1 < 1) alerts.push("sample size of group1 < 1");
249
- if (sample_size2 < 1) alerts.push("sample size of group2 < 1");
250
- const commonnames = group1names.filter((x) => group2names.includes(x));
251
- if (commonnames.length) alerts.push(`Common elements found between both groups: ${commonnames.join(", ")}`);
252
- return alerts;
253
- }
254
- async function readFileAndDelete(file, key, response) {
255
- const plot = await fs.promises.readFile(file);
256
- const plotBuffer = Buffer.from(plot).toString("base64");
257
- const { width, height } = imageSize(file);
258
- const obj = {
259
- src: `data:image/png;base64,${plotBuffer}`,
260
- size: `${width}x${height}`,
261
- key
262
- };
263
- response[key] = obj;
264
- fs.unlink(file, (err) => {
265
- if (err) throw new Error(err.message || String(err));
266
- });
267
- }
268
61
  async function validate_query_rnaseqGeneCount(ds) {
269
62
  const q = ds.queries.rnaseqGeneCount;
270
63
  if (!q) return;
@@ -7,8 +7,15 @@ import serverconfig from "#src/serverconfig.js";
7
7
  import { gdc_validate_query_geneExpression } from "#src/mds3.gdc.js";
8
8
  import { mayLimitSamples } from "#src/mds3.filter.js";
9
9
  import { clusterMethodLst, distanceMethodLst } from "#shared/clustering.js";
10
- import { TermTypes, ISOFORM_EXPRESSION, PROTEOME_ABUNDANCE } from "#shared/terms.js";
11
- import { termType2label } from "#shared/terms.js";
10
+ import { getData } from "#src/termdb.matrix.js";
11
+ import {
12
+ GENE_EXPRESSION,
13
+ METABOLITE_INTENSITY,
14
+ NUMERIC_DICTIONARY_TERM,
15
+ termType2label,
16
+ ISOFORM_EXPRESSION,
17
+ PROTEOME_ABUNDANCE
18
+ } from "#shared/terms.js";
12
19
  import { formatElapsedTime } from "#shared/time.js";
13
20
  const api = {
14
21
  endpoint: "termdb/cluster",
@@ -34,8 +41,9 @@ function init({ genomes }) {
34
41
  if (!ds) throw "invalid dataset name";
35
42
  if (ds.label === "GDC" && !ds.__gdc?.doneCaching)
36
43
  throw "The server has not finished caching the case IDs: try again in about 2 minutes.";
37
- if ([TermTypes.GENE_EXPRESSION, ISOFORM_EXPRESSION, TermTypes.METABOLITE_INTENSITY].includes(q.dataType)) {
38
- if (!ds.queries?.[q.dataType]) throw `no ${q.dataType} data on this dataset`;
44
+ if ([GENE_EXPRESSION, ISOFORM_EXPRESSION, METABOLITE_INTENSITY, NUMERIC_DICTIONARY_TERM].includes(q.dataType)) {
45
+ if (!ds.queries?.[q.dataType] && q.dataType !== NUMERIC_DICTIONARY_TERM)
46
+ throw `no ${q.dataType} data on this dataset`;
39
47
  if (!q.terms) throw `missing gene list`;
40
48
  if (!Array.isArray(q.terms)) throw `gene list is not an array`;
41
49
  if (q.terms.length < 3)
@@ -43,7 +51,7 @@ function init({ genomes }) {
43
51
  result = await getResult(q, ds);
44
52
  } else if (PROTEOME_ABUNDANCE == q.dataType) {
45
53
  const proteomeQuery = ds.queries?.proteome;
46
- if (!proteomeQuery?.get) throw `no ${TermTypes.PROTEOME_ABUNDANCE} data getter on this dataset`;
54
+ if (!proteomeQuery?.get) throw `no ${PROTEOME_ABUNDANCE} data getter on this dataset`;
47
55
  if (!q.terms) throw `missing gene list`;
48
56
  if (!Array.isArray(q.terms)) throw `gene list is not an array`;
49
57
  if (q.terms.length < 3)
@@ -64,13 +72,16 @@ function init({ genomes }) {
64
72
  }
65
73
  async function getResult(q, ds) {
66
74
  let _q = q;
67
- if (q.dataType == TermTypes.GENE_EXPRESSION) {
75
+ if (q.dataType == GENE_EXPRESSION) {
68
76
  _q = JSON.parse(JSON.stringify(q));
69
77
  _q.forClusteringAnalysis = true;
70
78
  _q.__abortSignal = q.__abortSignal;
71
79
  }
72
80
  let term2sample2value, byTermId, bySampleId, skippedSexChrGenes;
73
- if (q.dataType == PROTEOME_ABUNDANCE) {
81
+ if (q.dataType == NUMERIC_DICTIONARY_TERM) {
82
+ ;
83
+ ({ term2sample2value, byTermId, bySampleId } = await getNumericDictTermAnnotation(q, ds));
84
+ } else if (q.dataType == PROTEOME_ABUNDANCE) {
74
85
  ;
75
86
  ({ term2sample2value, byTermId, bySampleId, skippedSexChrGenes } = await ds.queries.proteome.get(_q));
76
87
  } else {
@@ -90,7 +101,7 @@ async function getResult(q, ds) {
90
101
  const removedHierClusterTerms = [];
91
102
  if (noValueTerms.length) {
92
103
  removedHierClusterTerms.push({
93
- text: `Skipped ${q.dataType == TermTypes.GENE_EXPRESSION ? "genes" : "items"} with no data`,
104
+ text: `Skipped ${q.dataType == GENE_EXPRESSION ? "genes" : "items"} with no data`,
94
105
  lst: noValueTerms
95
106
  });
96
107
  }
@@ -100,7 +111,7 @@ async function getResult(q, ds) {
100
111
  if (term2sample2value.size == 0) throw "no data";
101
112
  if (term2sample2value.size == 1) {
102
113
  const g = Array.from(term2sample2value.keys())[0];
103
- return { term: { gene: g, type: TermTypes.GENE_EXPRESSION }, data: term2sample2value.get(g) };
114
+ return { term: { gene: g, type: GENE_EXPRESSION }, data: term2sample2value.get(g) };
104
115
  }
105
116
  const t = Date.now();
106
117
  const clustering = await doClustering(term2sample2value, q, Object.keys(bySampleId).length);
@@ -109,6 +120,29 @@ async function getResult(q, ds) {
109
120
  if (removedHierClusterTerms.length) result.removedHierClusterTerms = removedHierClusterTerms;
110
121
  return result;
111
122
  }
123
+ async function getNumericDictTermAnnotation(q, ds) {
124
+ const getDataArgs = {
125
+ // TODO: figure out when term is not a termwrapper
126
+ terms: q.terms.map((tw) => tw.term ? tw : { term: tw, q: { mode: "continuous" } }),
127
+ filter: q.filter,
128
+ filter0: q.filter0,
129
+ __protected__: q.__protected__
130
+ };
131
+ const data = await getData(getDataArgs, ds);
132
+ if (data.error) throw data.error;
133
+ const term2sample2value = /* @__PURE__ */ new Map();
134
+ for (const [key, sampleData] of Object.entries(data.samples)) {
135
+ for (const [term, value] of Object.entries(sampleData)) {
136
+ if (term !== "sample") {
137
+ if (!term2sample2value.has(term)) {
138
+ term2sample2value.set(term, {});
139
+ }
140
+ term2sample2value.get(term)[key] = value.value;
141
+ }
142
+ }
143
+ }
144
+ return { term2sample2value, byTermId: data.refs.byTermId, bySampleId: data.refs.bySampleId };
145
+ }
112
146
  async function doClustering(data, q, numCases = 1e3) {
113
147
  const sampleSet = /* @__PURE__ */ new Set();
114
148
  let firstTerm = true;
@@ -297,6 +331,7 @@ async function validateNative(q, ds) {
297
331
  const sampleId = ds.cohort.termdb.q.sampleName2id(sampleName);
298
332
  if (!sampleId) continue;
299
333
  if (limitSamples && !limitSamples.has(sampleId)) continue;
334
+ if (!Number.isFinite(samplesData[sampleName])) continue;
300
335
  s2v[sampleId] = samplesData[sampleName];
301
336
  }
302
337
  if (Object.keys(s2v).length) {
@@ -52,6 +52,7 @@ function make(q, req, res, ds, genome) {
52
52
  dataDownloadCatch: tdb.dataDownloadCatch,
53
53
  matrix: tdb.matrix,
54
54
  hierCluster: tdb.hierCluster,
55
+ numericDictTermCluster: tdb.numericDictTermCluster,
55
56
  mclass: tdb.mclass,
56
57
  alwaysRefillCategoricalTermValues: tdb.alwaysRefillCategoricalTermValues,
57
58
  isGeneSetTermdb: tdb.isGeneSetTermdb,
@@ -3,6 +3,7 @@ import { getData } from "../src/termdb.matrix.js";
3
3
  import { run_R } from "@sjcrh/proteinpaint-r";
4
4
  import { mayLog } from "#src/helpers.ts";
5
5
  import { formatElapsedTime } from "#shared";
6
+ import { renderVolcano } from "../src/renderVolcano.ts";
6
7
  const api = {
7
8
  endpoint: "termdb/diffMeth",
8
9
  methods: {
@@ -39,7 +40,7 @@ function init({ genomes }) {
39
40
  throw new Error(
40
41
  "Differential methylation analysis returned no data. Please verify sample selections and try again."
41
42
  );
42
- if (Array.isArray(results.data) && !results.data.length)
43
+ if ("totalRows" in results.data && results.data.totalRows === 0)
43
44
  throw new Error("No promoters passed filtering. Try relaxing group criteria or selecting more samples.");
44
45
  res.send(results);
45
46
  } catch (e) {
@@ -168,8 +169,9 @@ async function run_diffMeth(param, ds, term_results, term_results2) {
168
169
  const time1 = Date.now();
169
170
  const result = JSON.parse(await run_R("diffMeth.R", JSON.stringify(diffMethInput)));
170
171
  mayLog("Time taken to run diffMeth:", formatElapsedTime(Date.now() - time1));
172
+ const rendered = await renderVolcano(result.promoter_data, param.volcanoRender);
171
173
  const output = {
172
- data: result.promoter_data,
174
+ data: rendered,
173
175
  sample_size1,
174
176
  sample_size2
175
177
  };
@@ -51,9 +51,10 @@ function init({ genomes }) {
51
51
  __abortSignal: q.__abortSignal
52
52
  });
53
53
  const controlSampleIds = cohortData.controlSampleIds || /* @__PURE__ */ new Set();
54
+ const prior = assay.cohorts[cohortName].prior;
54
55
  for (const entry of cohortData.allEntries || []) {
55
56
  const s2v = entry.s2v;
56
- const stats = getCohortStats(s2v, controlSampleIds);
57
+ const stats = getCohortStats(s2v, controlSampleIds, prior);
57
58
  delete entry.s2v;
58
59
  entry.foldChange = stats.foldChange;
59
60
  entry.pValue = stats.pValue;
@@ -71,7 +72,7 @@ function init({ genomes }) {
71
72
  }
72
73
  };
73
74
  }
74
- function getCohortStats(allS2v, controlSampleIds) {
75
+ function getCohortStats(allS2v, controlSampleIds, prior) {
75
76
  if (!allS2v || typeof allS2v != "object") return { foldChange: null, pValue: null, testedN: 0, controlN: 0 };
76
77
  const controlValues = [];
77
78
  const testedValues = [];
@@ -84,7 +85,10 @@ function getCohortStats(allS2v, controlSampleIds) {
84
85
  const controlMean = controlValues?.length ? controlValues.reduce((sum, v) => sum + v, 0) / controlValues.length : null;
85
86
  const testedMean = testedValues?.length ? testedValues.reduce((sum, v) => sum + v, 0) / testedValues.length : null;
86
87
  const foldChange = testedMean != null && controlMean != null && Number.isFinite(testedMean) && Number.isFinite(controlMean) && controlMean !== 0 ? testedMean / controlMean : null;
87
- const pValue = getWelchPValue(testedValues, controlValues);
88
+ if (!Number.isFinite(prior?.d0) || prior.d0 <= 0 || !Number.isFinite(prior?.s0sq) || prior.s0sq <= 0) {
89
+ throw "prior with finite positive d0 and s0sq is required for moderated t-test";
90
+ }
91
+ const pValue = getModeratedPValue(testedValues, controlValues, prior);
88
92
  return {
89
93
  foldChange,
90
94
  pValue,
@@ -92,36 +96,38 @@ function getCohortStats(allS2v, controlSampleIds) {
92
96
  controlN: controlValues.length
93
97
  };
94
98
  }
95
- function getWelchPValue(a, b) {
99
+ function getModeratedPValue(a, b, prior) {
96
100
  const n1 = a.length;
97
101
  const n2 = b.length;
98
102
  if (n1 < 2 || n2 < 2) return null;
99
103
  const mean1 = a.reduce((s, v) => s + v, 0) / n1;
100
104
  const mean2 = b.reduce((s, v) => s + v, 0) / n2;
101
- const var1 = sampleVariance(a, mean1);
102
- const var2 = sampleVariance(b, mean2);
103
- if (!Number.isFinite(var1) || !Number.isFinite(var2)) return null;
104
- const se2 = var1 / n1 + var2 / n2;
105
- if (!(se2 > 0)) {
105
+ let ss1 = 0;
106
+ for (const v of a) {
107
+ const d = v - mean1;
108
+ ss1 += d * d;
109
+ }
110
+ let ss2 = 0;
111
+ for (const v of b) {
112
+ const d = v - mean2;
113
+ ss2 += d * d;
114
+ }
115
+ const dfResidual = n1 + n2 - 2;
116
+ const pooledVar = (ss1 + ss2) / dfResidual;
117
+ const { d0, s0sq } = prior;
118
+ const sTildeSq = (d0 * s0sq + dfResidual * pooledVar) / (d0 + dfResidual);
119
+ const se = Math.sqrt(sTildeSq * (1 / n1 + 1 / n2));
120
+ if (!(se > 0)) {
106
121
  if (mean1 === mean2) return 1;
107
122
  return 1e-300;
108
123
  }
109
- const t = (mean1 - mean2) / Math.sqrt(se2);
110
- const df = se2 * se2 / ((var1 / n1) ** 2 / (n1 - 1) + (var2 / n2) ** 2 / (n2 - 1));
124
+ const t = (mean1 - mean2) / se;
125
+ const df = d0 + dfResidual;
111
126
  if (!Number.isFinite(df) || df < 0.1) return null;
112
127
  const p = 2 * tCdfTail(Math.abs(t), df);
113
128
  if (!Number.isFinite(p)) return null;
114
129
  return Math.max(1e-300, Math.min(1, p));
115
130
  }
116
- function sampleVariance(lst, mean) {
117
- if (lst.length < 2) return NaN;
118
- let sumsq = 0;
119
- for (const v of lst) {
120
- const d = v - mean;
121
- sumsq += d * d;
122
- }
123
- return sumsq / (lst.length - 1);
124
- }
125
131
  function tCdfTail(t, df) {
126
132
  const x = df / (df + t * t);
127
133
  return 0.5 * regularizedBetaIncomplete(df / 2, 0.5, x);
@@ -217,6 +223,8 @@ async function validate_query_proteome(ds) {
217
223
  if (!cohort.controlFilter)
218
224
  throw `Missing controlFilter in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
219
225
  if (!cohort.caseFilter) throw `Missing caseFilter in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
226
+ if (!cohort.prior?.d0 || !cohort.prior?.s0sq)
227
+ throw `Missing prior.d0 and prior.s0sq in queries.proteome.assays.${assayName}.cohorts.${cohortName}`;
220
228
  }
221
229
  } else {
222
230
  throw `Invalid assay structure for "${assayName}". Must have .cohorts`;
@@ -25,7 +25,8 @@ function init({ genomes }) {
25
25
  if (!ds.queries?.singleCell?.DEgenes || !ds.queries.singleCell.DEgenes.get)
26
26
  throw new Error("DE genes not supported on this dataset.");
27
27
  result = await ds.queries.singleCell.DEgenes.get(q);
28
- if (!result || !result.data || !result?.data?.length) {
28
+ const isEmpty = !result || !result.data || (Array.isArray(result.data) ? result.data.length === 0 : !result.data.totalRows);
29
+ if (isEmpty) {
29
30
  result = {
30
31
  status: 404,
31
32
  error: !result ? "No data found." : "No differentially expressed genes found."
@@ -48,11 +48,11 @@ async function validate_query_singleCell(ds, genome) {
48
48
  const q = ds.queries.singleCell;
49
49
  if (!q) return;
50
50
  if (typeof q.samples != "object") throw new Error("singleCell.samples{} not object");
51
+ if (typeof q.data != "object") throw new Error("singleCell.data{} not object");
51
52
  if (typeof q.samples.get == "function") {
52
53
  } else {
53
- await validateSamplesNative(q.samples, q.data, ds);
54
+ await validateSamples(q, ds);
54
55
  }
55
- if (typeof q.data != "object") throw new Error("singleCell.data{} not object");
56
56
  if (q.data.src == "gdcapi") {
57
57
  gdc_validate_query_singleCell_data(ds, genome);
58
58
  } else if (q.data.src == "native") {
@@ -85,7 +85,8 @@ function validateImages(images) {
85
85
  if (!images.label) images.label = "Images";
86
86
  if (!images.fileName) throw new Error("images.fileName missing");
87
87
  }
88
- async function validateSamplesNative(S, D, ds) {
88
+ async function validateSamples(q, ds) {
89
+ const S = q.samples, D = q.data;
89
90
  const samples = /* @__PURE__ */ new Map();
90
91
  for (const plot of D.plots) {
91
92
  for (const fn of await fs.promises.readdir(path.join(serverconfig.tpmasterdir, plot.folder))) {
@@ -102,6 +103,8 @@ async function validateSamplesNative(S, D, ds) {
102
103
  }
103
104
  if (!plot.colorColumns || plot.colorColumns.length == 0) continue;
104
105
  }
106
+ if (samples.size == 0) throw new Error("no scrna samples found");
107
+ console.log(samples.size, "singleCell samples loaded from " + ds.label);
105
108
  if (S.sampleColumns) {
106
109
  for (const { termid } of S.sampleColumns) {
107
110
  const term = ds.cohort.termdb.q.termjsonByOneid(termid);
@@ -114,7 +117,13 @@ async function validateSamplesNative(S, D, ds) {
114
117
  }
115
118
  }
116
119
  S.get = () => {
117
- return { samples: [...samples.values()] };
120
+ const re = { samples: [...samples.values()] };
121
+ if (q.metaResults) {
122
+ re.metaResults = q.metaResults.map((i) => {
123
+ return { name: i.name };
124
+ });
125
+ }
126
+ return re;
118
127
  };
119
128
  }
120
129
  function validateDataNative(D, ds) {
@@ -126,6 +135,28 @@ function validateDataNative(D, ds) {
126
135
  }
127
136
  const file2Lines = {};
128
137
  D.get = async (q) => {
138
+ if (q.checkPlotAvailability) {
139
+ const plots2 = [];
140
+ for (const plot of D.plots) {
141
+ if (!q.plots.includes(plot.name)) continue;
142
+ const tsvfile = path.join(
143
+ serverconfig.tpmasterdir,
144
+ plot.folder,
145
+ (q.sample?.eID || q.sample?.sID) + (plot.fileSuffix || "")
146
+ );
147
+ try {
148
+ await file_is_readable(tsvfile);
149
+ plots2.push({
150
+ name: plot.name,
151
+ expCells: [],
152
+ // FIXME avoid breaking client but shouldn't be needed
153
+ noExpCells: []
154
+ });
155
+ } catch (_) {
156
+ }
157
+ }
158
+ return { plots: plots2 };
159
+ }
129
160
  const plots = [];
130
161
  let geneExpMap;
131
162
  if (ds.queries.singleCell.geneExpression && q.gene) {
@@ -138,7 +169,7 @@ function validateDataNative(D, ds) {
138
169
  const tsvfile = path.join(
139
170
  serverconfig.tpmasterdir,
140
171
  plot.folder,
141
- (q.sample?.eID || q.sample?.sID) + plot.fileSuffix
172
+ (q.sample?.eID || q.sample?.sID) + (plot.fileSuffix || "")
142
173
  );
143
174
  if (!file2Lines[tsvfile]) {
144
175
  await file_is_readable(tsvfile);