@sjcrh/proteinpaint-server 2.60.0 → 2.62.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.60.0",
3
+ "version": "2.62.0",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -61,7 +61,7 @@
61
61
  },
62
62
  "dependencies": {
63
63
  "@sjcrh/augen": "2.46.0",
64
- "@sjcrh/proteinpaint-rust": "2.60.0",
64
+ "@sjcrh/proteinpaint-rust": "2.61.1",
65
65
  "better-sqlite3": "^9.4.1",
66
66
  "body-parser": "^1.15.2",
67
67
  "canvas": "~2.11.2",
@@ -102,6 +102,7 @@
102
102
  "src/checkReadingFrame.js",
103
103
  "src/bedj.parseBed.js",
104
104
  "utils/*.R",
105
+ "utils/*.py",
105
106
  "LICENSE/*"
106
107
  ],
107
108
  "bugs": {
@@ -0,0 +1,64 @@
1
+ import path from "path";
2
+ import serverconfig from "#src/serverconfig.js";
3
+ import { illegalpath } from "#src/utils.js";
4
+ const routePath = "dzimages";
5
+ const api = {
6
+ endpoint: `${routePath}/:sampleId`,
7
+ methods: {
8
+ get: {
9
+ init,
10
+ request: {
11
+ typeId: "any"
12
+ },
13
+ response: {
14
+ typeId: "any"
15
+ }
16
+ },
17
+ post: {
18
+ alternativeFor: "get",
19
+ init
20
+ }
21
+ }
22
+ };
23
+ function init({ genomes }) {
24
+ return async (req, res) => {
25
+ let imagePath;
26
+ try {
27
+ const g = genomes[req.query.genome];
28
+ if (!g)
29
+ throw "invalid genome name";
30
+ const ds = g.datasets[req.query.dslabel];
31
+ if (!ds)
32
+ throw "invalid dataset name";
33
+ const sampleId = req.params.sampleId;
34
+ if (!sampleId)
35
+ throw "invalid sampleId";
36
+ if (illegalpath(req.query.file))
37
+ throw `illegalpath filepath`;
38
+ const filename = path.basename(req.query.file);
39
+ const allowedExtensions = [".dzi", ".jpeg", ".png"];
40
+ const extension = path.extname(filename);
41
+ if (!allowedExtensions.includes(extension)) {
42
+ throw `Invalid file extension. Allowed extensions are ${allowedExtensions.join(", ")}`;
43
+ }
44
+ imagePath = path.join(
45
+ `${serverconfig.tpmasterdir}/${ds.queries.DZImages.imageBySampleFolder}`,
46
+ `${sampleId}/${req.query.file}`
47
+ );
48
+ res.sendFile(imagePath, (err) => {
49
+ if (err) {
50
+ res.status(404).send("Image not found");
51
+ }
52
+ });
53
+ } catch (e) {
54
+ console.log(e);
55
+ res.send({
56
+ status: "error",
57
+ error: e.error || e
58
+ });
59
+ }
60
+ };
61
+ }
62
+ export {
63
+ api
64
+ };
@@ -0,0 +1,50 @@
1
+ import fs from "fs";
2
+ import path from "path";
3
+ import serverconfig from "#src/serverconfig.js";
4
+ const api = {
5
+ endpoint: "sampledzimages",
6
+ methods: {
7
+ get: {
8
+ init,
9
+ request: {
10
+ typeId: "GetSampleDZImagesRequest"
11
+ },
12
+ response: {
13
+ typeId: "GetSampleDZImagesResponse"
14
+ }
15
+ },
16
+ post: {
17
+ alternativeFor: "get",
18
+ init
19
+ }
20
+ }
21
+ };
22
+ function init({ genomes }) {
23
+ return async (req, res) => {
24
+ try {
25
+ const g = genomes[req.query.genome];
26
+ if (!g)
27
+ throw "invalid genome name";
28
+ const ds = g.datasets[req.query.dslabel];
29
+ if (!ds)
30
+ throw "invalid dataset name";
31
+ const sampleId = req.query.sample_id;
32
+ const sampleDZImagesPath = path.join(
33
+ `${serverconfig.tpmasterdir}/${ds.queries.DZImages.imageBySampleFolder}`,
34
+ sampleId
35
+ );
36
+ const sampleDZImages = getDZImages(sampleDZImagesPath);
37
+ res.send({ sampleDZImages });
38
+ } catch (e) {
39
+ console.log(e);
40
+ res.status(404).send("Sample images not found");
41
+ }
42
+ };
43
+ }
44
+ function getDZImages(sampleImagesPath) {
45
+ const files = fs.readdirSync(sampleImagesPath);
46
+ return files.filter((file) => path.extname(file) === ".dzi");
47
+ }
48
+ export {
49
+ api
50
+ };
@@ -0,0 +1,129 @@
1
+ import path from "path";
2
+ import { run_rust } from "@sjcrh/proteinpaint-rust";
3
+ import { get_ds_tdb } from "../src/termdb.js";
4
+ import run_R from "../src/run_R.js";
5
+ import serverconfig from "../src/serverconfig.js";
6
+ const api = {
7
+ endpoint: "DEanalysis",
8
+ methods: {
9
+ all: {
10
+ init,
11
+ request: {
12
+ typeId: "DERequest"
13
+ },
14
+ response: {
15
+ typeId: "DEResponse"
16
+ // will combine this with type checker
17
+ //valid: (t) => {}
18
+ }
19
+ }
20
+ }
21
+ };
22
+ function init({ genomes }) {
23
+ return async (req, res) => {
24
+ try {
25
+ const q = req.query;
26
+ const genome = genomes[q.genome];
27
+ if (!genome)
28
+ throw "invalid genome";
29
+ const [ds] = get_ds_tdb(genome, q);
30
+ const results = await run_DE(req.query, ds);
31
+ res.send(results);
32
+ } catch (e) {
33
+ res.send({ status: "error", error: e.message || e });
34
+ if (e instanceof Error && e.stack)
35
+ console.log(e);
36
+ }
37
+ };
38
+ }
39
+ async function run_DE(param, ds) {
40
+ if (param.samplelst?.groups?.length != 2)
41
+ throw ".samplelst.groups.length!=2";
42
+ if (param.samplelst.groups[0].values?.length < 1)
43
+ throw "samplelst.groups[0].values.length<1";
44
+ if (param.samplelst.groups[1].values?.length < 1)
45
+ throw "samplelst.groups[1].values.length<1";
46
+ const q = ds.queries.rnaseqGeneCount;
47
+ if (!q)
48
+ return;
49
+ if (!q.file)
50
+ throw "unknown data type for rnaseqGeneCount";
51
+ const group1names = [];
52
+ for (const s of param.samplelst.groups[0].values) {
53
+ if (!Number.isInteger(s.sampleId))
54
+ continue;
55
+ const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
56
+ if (!n)
57
+ continue;
58
+ if (q.allSampleSet.has(n)) {
59
+ group1names.push(n);
60
+ } else {
61
+ }
62
+ }
63
+ const group2names = [];
64
+ for (const s of param.samplelst.groups[1].values) {
65
+ if (!Number.isInteger(s.sampleId))
66
+ continue;
67
+ const n = ds.cohort.termdb.q.id2sampleName(s.sampleId);
68
+ if (!n)
69
+ continue;
70
+ if (q.allSampleSet.has(n)) {
71
+ group2names.push(n);
72
+ } else {
73
+ }
74
+ }
75
+ const sample_size1 = group1names.length;
76
+ const sample_size2 = group2names.length;
77
+ if (sample_size1 < 1)
78
+ throw "sample size of group1 < 1";
79
+ if (sample_size2 < 1)
80
+ throw "sample size of group2 < 1";
81
+ const cases_string = group1names.map((i) => i).join(",");
82
+ const controls_string = group2names.map((i) => i).join(",");
83
+ const expression_input = {
84
+ case: cases_string,
85
+ control: controls_string,
86
+ input_file: q.file,
87
+ min_count: param.min_count,
88
+ min_total_count: param.min_total_count
89
+ };
90
+ const sample_size_limit = 8;
91
+ let result;
92
+ if (group1names.length <= sample_size_limit && group2names.length <= sample_size_limit || param.method == "edgeR") {
93
+ const time1 = (/* @__PURE__ */ new Date()).valueOf();
94
+ result = JSON.parse(
95
+ await run_R(path.join(serverconfig.binpath, "utils", "edge.R"), JSON.stringify(expression_input))
96
+ );
97
+ const time2 = (/* @__PURE__ */ new Date()).valueOf();
98
+ console.log("Time taken to run edgeR:", time2 - time1, "ms");
99
+ param.method = "edgeR";
100
+ } else if (param.method == "wilcoxon") {
101
+ const time1 = (/* @__PURE__ */ new Date()).valueOf();
102
+ const rust_output = await run_rust("DEanalysis", JSON.stringify(expression_input));
103
+ const time2 = (/* @__PURE__ */ new Date()).valueOf();
104
+ for (const line of rust_output.split("\n")) {
105
+ if (line.startsWith("adjusted_p_values:")) {
106
+ result = JSON.parse(line.replace("adjusted_p_values:", ""));
107
+ } else {
108
+ }
109
+ }
110
+ console.log("Time taken to run rust DE pipeline:", time2 - time1, "ms");
111
+ param.method = "wilcoxon";
112
+ } else {
113
+ const time1 = (/* @__PURE__ */ new Date()).valueOf();
114
+ const rust_output = await run_rust("DEanalysis", JSON.stringify(expression_input));
115
+ const time2 = (/* @__PURE__ */ new Date()).valueOf();
116
+ for (const line of rust_output.split("\n")) {
117
+ if (line.startsWith("adjusted_p_values:")) {
118
+ result = JSON.parse(line.replace("adjusted_p_values:", ""));
119
+ } else {
120
+ }
121
+ }
122
+ console.log("Time taken to run rust DE pipeline:", time2 - time1, "ms");
123
+ param.method = "wilcoxon";
124
+ }
125
+ return { data: result, sample_size1, sample_size2, method: param.method };
126
+ }
127
+ export {
128
+ api
129
+ };
@@ -4,9 +4,9 @@ import * as utils from "#src/utils.js";
4
4
  import serverconfig from "#src/serverconfig.js";
5
5
  import { gdc_validate_query_geneExpression } from "#src/mds3.gdc.js";
6
6
  import { mayLimitSamples } from "#src/mds3.filter.js";
7
- import { dtgeneexpression } from "#shared/common.js";
8
7
  import { clusterMethodLst, distanceMethodLst } from "#shared/clustering.js";
9
8
  import { getResult as getResultGene } from "#src/gene.js";
9
+ import { TermTypes } from "#shared/terms.js";
10
10
  const api = {
11
11
  endpoint: "termdb/cluster",
12
12
  methods: {
@@ -34,9 +34,9 @@ function init({ genomes }) {
34
34
  throw "invalid dataset name";
35
35
  if (ds.__gdc && !ds.__gdc.doneCaching)
36
36
  throw "The server has not finished caching the case IDs: try again in about 2 minutes.";
37
- if (q.dataType == dtgeneexpression) {
38
- if (!ds.queries?.geneExpression)
39
- throw "no geneExpression data on this dataset";
37
+ if (q.dataType == TermTypes.GENE_EXPRESSION || q.dataType == TermTypes.METABOLITE_INTENSITY) {
38
+ if (!ds.queries?.[q.dataType])
39
+ throw `no ${q.dataType} data on this dataset`;
40
40
  result = await getResult(q, ds);
41
41
  } else {
42
42
  throw "unknown q.dataType " + q.dataType;
@@ -53,15 +53,16 @@ function init({ genomes }) {
53
53
  };
54
54
  }
55
55
  async function getResult(q, ds) {
56
- const { gene2sample2value, byTermId, bySampleId } = await ds.queries.geneExpression.get(q);
57
- if (gene2sample2value.size == 0)
56
+ const type = q.dataType;
57
+ const { term2sample2value, byTermId, bySampleId } = await ds.queries[type].get(q);
58
+ if (term2sample2value.size == 0)
58
59
  throw "no data";
59
- if (gene2sample2value.size == 1) {
60
- const g = Array.from(gene2sample2value.keys())[0];
61
- return { gene: g, data: gene2sample2value.get(g) };
60
+ if (term2sample2value.size == 1) {
61
+ const g = Array.from(term2sample2value.keys())[0];
62
+ return { term: { gene: g, type: TermTypes.GENE_EXPRESSION }, data: term2sample2value.get(g) };
62
63
  }
63
64
  const t = Date.now();
64
- const clustering = await doClustering(gene2sample2value, q);
65
+ const clustering = await doClustering(term2sample2value, q);
65
66
  if (serverconfig.debugmode)
66
67
  console.log("clustering done:", Date.now() - t, "ms");
67
68
  return { clustering, byTermId, bySampleId };
@@ -173,7 +174,7 @@ async function validateNative(q, ds, genome) {
173
174
  q.get = async (param) => {
174
175
  const limitSamples = await mayLimitSamples(param, q.samples, ds);
175
176
  if (limitSamples?.size == 0) {
176
- return { gene2sample2value: /* @__PURE__ */ new Set(), byTermId: {}, bySampleId: {} };
177
+ return { term2sample2value: /* @__PURE__ */ new Set(), byTermId: {}, bySampleId: {} };
177
178
  }
178
179
  const bySampleId = {};
179
180
  const samples = q.samples || [];
@@ -186,28 +187,32 @@ async function validateNative(q, ds, genome) {
186
187
  bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
187
188
  }
188
189
  }
189
- const gene2sample2value = /* @__PURE__ */ new Map();
190
- for (const g of param.genes) {
191
- if (!g.gene)
190
+ const term2sample2value = /* @__PURE__ */ new Map();
191
+ for (const g of param.terms) {
192
+ const geneTerm = g;
193
+ if (!geneTerm.gene)
192
194
  continue;
193
- if (!g.chr) {
194
- const re = getResultGene(genome, { input: g.gene, deep: 1 });
195
+ if (!geneTerm.chr) {
196
+ const re = getResultGene(genome, { input: geneTerm.gene, deep: 1 });
195
197
  if (!re.gmlst || re.gmlst.length == 0) {
196
- console.warn("unknown gene:" + g.gene);
198
+ console.warn("unknown gene:" + geneTerm.gene);
197
199
  continue;
198
200
  }
199
201
  const i = re.gmlst.find((i2) => i2.isdefault) || re.gmlst[0];
200
- g.start = i.start;
201
- g.stop = i.stop;
202
- g.chr = i.chr;
202
+ geneTerm.start = i.start;
203
+ geneTerm.stop = i.stop;
204
+ geneTerm.chr = i.chr;
203
205
  }
204
206
  const s2v = {};
205
207
  await utils.get_lines_bigfile({
206
- args: [q.file, (q.nochr ? g.chr?.replace("chr", "") : g.chr) + ":" + g.start + "-" + g.stop],
208
+ args: [
209
+ q.file,
210
+ (q.nochr ? geneTerm.chr?.replace("chr", "") : geneTerm.chr) + ":" + geneTerm.start + "-" + geneTerm.stop
211
+ ],
207
212
  // must do g.chr?.replace to avoid tsc error
208
213
  callback: (line) => {
209
214
  const l = line.split(" ");
210
- if (l[3].toLowerCase() != g.gene.toLowerCase())
215
+ if (l[3].toLowerCase() != geneTerm.gene.toLowerCase())
211
216
  return;
212
217
  for (let i = 4; i < l.length; i++) {
213
218
  const sampleId = samples[i - 4];
@@ -223,12 +228,12 @@ async function validateNative(q, ds, genome) {
223
228
  }
224
229
  });
225
230
  if (Object.keys(s2v).length)
226
- gene2sample2value.set(g.gene, s2v);
231
+ term2sample2value.set(geneTerm.gene, s2v);
227
232
  }
228
233
  const byTermId = {};
229
- if (gene2sample2value.size == 0)
230
- throw "no data available for the input " + param.genes?.map((g) => g.gene).join(", ");
231
- return { gene2sample2value, byTermId, bySampleId };
234
+ if (term2sample2value.size == 0)
235
+ throw "no data available for the input " + param.terms?.map((g) => g.gene).join(", ");
236
+ return { term2sample2value, byTermId, bySampleId };
232
237
  };
233
238
  }
234
239
  export {
@@ -175,6 +175,11 @@ function addGenomicQueries(c, ds, genome) {
175
175
  q2.NIdata[k] = JSON.parse(JSON.stringify(q.NIdata[k]));
176
176
  }
177
177
  }
178
+ if (q.DZImages) {
179
+ q2.DZImages = {
180
+ type: q.DZImages.type
181
+ };
182
+ }
178
183
  if (q.singleSampleGbtk) {
179
184
  q2.singleSampleGbtk = {};
180
185
  for (const k in q.singleSampleGbtk) {
@@ -0,0 +1,106 @@
1
+ import { run_rust } from "@sjcrh/proteinpaint-rust";
2
+ import { get_samples } from "#src/termdb.sql.js";
3
+ import { TermTypes } from "#shared/terms.js";
4
+ const api = {
5
+ endpoint: "termdb/getTopTermsByType",
6
+ methods: {
7
+ all: {
8
+ init,
9
+ request: {
10
+ typeId: "TermdbTopTermsByTypeRequest"
11
+ },
12
+ response: {
13
+ typeId: "TermdbTopTermsByTypeResponse"
14
+ }
15
+ }
16
+ }
17
+ };
18
+ function init({ genomes }) {
19
+ return async (req, res) => {
20
+ try {
21
+ const q = req.query;
22
+ const type = q.type;
23
+ const genome = genomes[q.genome];
24
+ if (!genome)
25
+ throw "invalid genome";
26
+ const ds = genome.datasets?.[q.dslabel];
27
+ if (!ds)
28
+ throw "invalid dslabel";
29
+ if (!ds.queries[type])
30
+ throw "not supported on dataset";
31
+ const t = Date.now();
32
+ const terms = await ds.queries[type].getTopTerms(q);
33
+ res.send({ terms });
34
+ } catch (e) {
35
+ res.send({ status: "error", error: e.message || e });
36
+ }
37
+ };
38
+ }
39
+ function validate_query_getTopTermsByType(ds, genome) {
40
+ const types = [TermTypes.METABOLITE_INTENSITY];
41
+ for (const type of types) {
42
+ if (ds.queries[type]) {
43
+ const q = ds.queries[type];
44
+ if (!q)
45
+ return;
46
+ if (q.src == "gdcapi")
47
+ gdcValidateQuery(ds, genome, type);
48
+ else if (q.src == "native")
49
+ nativeValidateQuery(ds, type);
50
+ else
51
+ throw "unknown topVariablyExpressedGenes.src";
52
+ }
53
+ }
54
+ }
55
+ function nativeValidateQuery(ds, type) {
56
+ ds.queries[type].getTopTerms = async (q) => {
57
+ const typeQuery = ds.queries[type];
58
+ const samples = [];
59
+ if (q.filter) {
60
+ const sidlst = await get_samples(q.filter, ds);
61
+ for (const i of sidlst) {
62
+ if (typeQuery.samples.includes(i.id)) {
63
+ const n = ds.cohort.termdb.q.id2sampleName(i.id);
64
+ if (!n)
65
+ throw "sample id cannot convert to string name";
66
+ samples.push(n);
67
+ }
68
+ }
69
+ } else {
70
+ for (const i of typeQuery.samples) {
71
+ const n = ds.cohort.termdb.q.id2sampleName(i.id);
72
+ if (!n)
73
+ throw "sample id cannot convert to string name";
74
+ samples.push(n);
75
+ }
76
+ }
77
+ const terms = await computeTopTerms(typeQuery.file, samples, type);
78
+ return terms;
79
+ };
80
+ }
81
+ async function computeTopTerms(file, samples, type) {
82
+ const input_json = {
83
+ input_file: file,
84
+ samples: samples.join(","),
85
+ param: "var"
86
+ };
87
+ const rust_result = await run_rust("computeTopTerms", JSON.stringify(input_json));
88
+ const rust_result_list = rust_result.split("\n");
89
+ let output_json;
90
+ for (const item of rust_result_list) {
91
+ if (item.includes("output_json")) {
92
+ output_json = JSON.parse(item.replace("output_json:", ""));
93
+ }
94
+ }
95
+ const varMetabolite = output_json.map((i) => ({ name: i.metabolite, type }));
96
+ return varMetabolite;
97
+ }
98
+ function gdcValidateQuery(ds, genome, type) {
99
+ ds.queries[type].getTopTerms = async (q) => {
100
+ return [];
101
+ };
102
+ }
103
+ export {
104
+ api,
105
+ validate_query_getTopTermsByType
106
+ };