@sjcrh/proteinpaint-server 2.44.0 → 2.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dataset/clinvar.hg19.js +53 -52
  2. package/dataset/clinvar.hg38.js +74 -73
  3. package/dataset/clinvar.js +164 -47
  4. package/dataset/termdb.test.js +257 -0
  5. package/genome/CriGri.js +1859 -27
  6. package/genome/cgc.js +743 -7
  7. package/genome/danRer10.js +1108 -46
  8. package/genome/dm3.js +71 -44
  9. package/genome/dm6.js +1926 -45
  10. package/genome/galGal5.js +23522 -46
  11. package/genome/galGal6.js +512 -46
  12. package/genome/hg19.js +293 -198
  13. package/genome/hg38.js +472 -105
  14. package/genome/hg38.test.js +406 -40
  15. package/genome/hgvirus.js +45 -20
  16. package/genome/mm10.js +135 -67
  17. package/genome/mm9.js +116 -79
  18. package/genome/rn6.js +1002 -47
  19. package/package.json +31 -35
  20. package/routes/_template_.js +30 -0
  21. package/routes/burden.js +149 -0
  22. package/routes/dataset.js +266 -0
  23. package/routes/dsdata.js +127 -0
  24. package/routes/gdc.maf.js +120 -0
  25. package/routes/gdc.mafBuild.js +106 -0
  26. package/routes/gdc.topMutatedGenes.js +465 -0
  27. package/routes/gene2canonicalisoform.js +41 -0
  28. package/routes/genelookup.js +52 -0
  29. package/routes/genomes.js +144 -0
  30. package/routes/healthcheck.js +30 -0
  31. package/routes/hicdata.js +98 -0
  32. package/routes/hicstat.js +55 -0
  33. package/routes/isoformlst.js +57 -0
  34. package/routes/ntseq.js +43 -0
  35. package/routes/pdomain.js +61 -0
  36. package/routes/snp.js +107 -0
  37. package/routes/termdb.categories.js +209 -0
  38. package/routes/termdb.cluster.js +228 -0
  39. package/routes/termdb.cohort.summary.js +38 -0
  40. package/routes/termdb.cohorts.js +49 -0
  41. package/routes/termdb.config.js +201 -0
  42. package/routes/termdb.getdescrstats.js +102 -0
  43. package/routes/termdb.getnumericcategories.js +92 -0
  44. package/routes/termdb.getpercentile.js +108 -0
  45. package/routes/termdb.getrootterm.js +65 -0
  46. package/routes/termdb.gettermchildren.js +67 -0
  47. package/routes/termdb.singleSampleMutation.js +80 -0
  48. package/routes/termdb.singlecellData.js +46 -0
  49. package/routes/termdb.singlecellSamples.js +160 -0
  50. package/routes/termdb.termsbyids.js +59 -0
  51. package/routes/termdb.topVariablyExpressedGenes.js +171 -0
  52. package/routes/termdb.violin.js +77 -0
  53. package/src/app.js +41498 -0
  54. package/src/serverconfig.js +14 -8
  55. package/start.js +3 -3
  56. package/routes/README.md +0 -84
  57. package/routes/burden.ts +0 -143
  58. package/routes/gdc.maf.ts +0 -195
  59. package/routes/gdc.mafBuild.ts +0 -114
  60. package/routes/gdc.topMutatedGenes.ts +0 -586
  61. package/routes/genelookup.ts +0 -50
  62. package/routes/healthcheck.ts +0 -29
  63. package/routes/hicdata.ts +0 -111
  64. package/routes/hicstat.ts +0 -55
  65. package/routes/termdb.categories.ts +0 -245
  66. package/routes/termdb.cluster.ts +0 -248
  67. package/routes/termdb.getdescrstats.ts +0 -102
  68. package/routes/termdb.getnumericcategories.ts +0 -99
  69. package/routes/termdb.getpercentile.ts +0 -118
  70. package/routes/termdb.getrootterm.ts +0 -73
  71. package/routes/termdb.gettermchildren.ts +0 -82
  72. package/routes/termdb.singleSampleMutation.ts +0 -87
  73. package/routes/termdb.singlecellData.ts +0 -49
  74. package/routes/termdb.singlecellSamples.ts +0 -175
  75. package/routes/termdb.termsbyids.ts +0 -63
  76. package/routes/termdb.topVariablyExpressedGenes.ts +0 -214
  77. package/routes/termdb.violin.ts +0 -77
  78. package/server.js +0 -2
  79. package/server.js.map +0 -1
  80. package/shared/common.js +0 -1080
  81. package/shared/termdb.initbinconfig.js +0 -96
  82. package/shared/vcf.js +0 -629
@@ -0,0 +1,209 @@
1
+ import { getOrderedLabels } from "#src/termdb.barchart.js";
2
+ import { getData } from "#src/termdb.matrix.js";
3
+ const api = {
4
+ endpoint: "termdb/categories",
5
+ methods: {
6
+ get: {
7
+ init,
8
+ request: {
9
+ typeId: "getcategoriesRequest"
10
+ },
11
+ response: {
12
+ typeId: "getcategoriesResponse"
13
+ },
14
+ examples: [
15
+ {
16
+ request: {
17
+ body: {
18
+ genome: "hg38-test",
19
+ dslabel: "TermdbTest",
20
+ embedder: "localhost",
21
+ getcategories: 1,
22
+ tid: "diaggrp",
23
+ term1_q: {
24
+ isAtomic: true,
25
+ hiddenValues: {},
26
+ type: "values",
27
+ groupsetting: { disabled: true },
28
+ mode: "discrete"
29
+ },
30
+ filter: {
31
+ type: "tvslst",
32
+ in: true,
33
+ join: "",
34
+ lst: [
35
+ {
36
+ tag: "cohortFilter",
37
+ type: "tvs",
38
+ tvs: {
39
+ term: {
40
+ name: "Cohort",
41
+ type: "categorical",
42
+ values: { ABC: { label: "ABC" }, XYZ: { label: "XYZ" } },
43
+ id: "subcohort",
44
+ isleaf: false,
45
+ groupsetting: { disabled: true }
46
+ },
47
+ values: [{ key: "ABC", label: "ABC" }]
48
+ }
49
+ }
50
+ ]
51
+ }
52
+ }
53
+ },
54
+ response: {
55
+ header: { status: 200 }
56
+ }
57
+ }
58
+ ]
59
+ },
60
+ post: {
61
+ alternativeFor: "get",
62
+ init
63
+ }
64
+ }
65
+ };
66
+ function init({ genomes }) {
67
+ return async (req, res) => {
68
+ const q = req.query;
69
+ try {
70
+ const g = genomes[req.query.genome];
71
+ if (!g)
72
+ throw "invalid genome name";
73
+ const ds = g.datasets[req.query.dslabel];
74
+ if (!ds)
75
+ throw "invalid dataset name";
76
+ const tdb = ds.cohort.termdb;
77
+ if (!tdb)
78
+ throw "invalid termdb object";
79
+ await trigger_getcategories(q, res, tdb, ds, g);
80
+ } catch (e) {
81
+ res.send({ error: e?.message || e });
82
+ if (e instanceof Error && e.stack)
83
+ console.log(e);
84
+ }
85
+ };
86
+ }
87
+ async function trigger_getcategories(q, res, tdb, ds, genome) {
88
+ if (!q.tid)
89
+ throw ".tid missing";
90
+ const term = q.type == "geneVariant" ? { name: q.tid, type: "geneVariant", isleaf: true } : tdb.q.termjsonByOneid(q.tid);
91
+ const arg = {
92
+ filter: q.filter,
93
+ terms: q.type == "geneVariant" ? [{ term, q: { isAtomic: true } }] : [{ id: q.tid, term, q: q.term1_q || getDefaultQ(term, q) }],
94
+ currentGeneNames: q.currentGeneNames,
95
+ // optional, from mds3 mayAddGetCategoryArgs()
96
+ rglst: q.rglst
97
+ // optional, from mds3 mayAddGetCategoryArgs()
98
+ };
99
+ const data = await getData(arg, ds, genome);
100
+ if (data.error)
101
+ throw data.error;
102
+ const lst = [];
103
+ if (q.type == "geneVariant") {
104
+ const samples = data.samples;
105
+ const dtClassMap = /* @__PURE__ */ new Map();
106
+ if (ds.assayAvailability?.byDt) {
107
+ for (const [dtType, dtValue] of Object.entries(ds.assayAvailability.byDt)) {
108
+ if (dtValue.byOrigin) {
109
+ dtClassMap.set(parseInt(dtType), { byOrigin: { germline: {}, somatic: {} } });
110
+ }
111
+ }
112
+ }
113
+ const sampleCountedFor = /* @__PURE__ */ new Set();
114
+ for (const [sampleId, sampleData] of Object.entries(samples)) {
115
+ const values = sampleData[q.tid].values;
116
+ sampleCountedFor.clear();
117
+ for (const value of values) {
118
+ if (!dtClassMap.has(value.dt)) {
119
+ dtClassMap.set(value.dt, {});
120
+ }
121
+ const dtClasses = dtClassMap.get(value.dt);
122
+ if (dtClasses.byOrigin) {
123
+ if (!dtClasses.byOrigin[value.origin][value.class]) {
124
+ dtClasses.byOrigin[value.origin][value.class] = 1;
125
+ sampleCountedFor.add(`${value.dt} ${value.origin} ${value.class}`);
126
+ }
127
+ if (!sampleCountedFor.has(`${value.dt} ${value.origin} ${value.class}`)) {
128
+ sampleCountedFor.add(`${value.dt} ${value.origin} ${value.class}`);
129
+ dtClasses.byOrigin[value.origin][value.class] += 1;
130
+ }
131
+ } else {
132
+ if (!dtClasses[value.class]) {
133
+ sampleCountedFor.add(`${value.dt} ${value.class}`);
134
+ dtClasses[value.class] = 1;
135
+ }
136
+ if (!sampleCountedFor.has(`${value.dt} ${value.class}`)) {
137
+ sampleCountedFor.add(`${value.dt} ${value.class}`);
138
+ dtClasses[value.class] += 1;
139
+ }
140
+ }
141
+ }
142
+ }
143
+ for (const [dt, classes] of dtClassMap) {
144
+ lst.push({
145
+ dt,
146
+ classes
147
+ });
148
+ }
149
+ } else {
150
+ const key2count = /* @__PURE__ */ new Map();
151
+ for (const sid in data.samples) {
152
+ const v = data.samples[sid][q.tid];
153
+ if (!v)
154
+ continue;
155
+ if (!("key" in v))
156
+ continue;
157
+ key2count.set(v.key, 1 + (key2count.get(v.key) || 0));
158
+ }
159
+ for (const [key, count] of key2count) {
160
+ lst.push({
161
+ samplecount: count,
162
+ key,
163
+ label: data.refs?.byTermId?.[q.tid]?.events?.find((e) => e.event === key).label || term?.values?.[key]?.label || key
164
+ });
165
+ }
166
+ }
167
+ const orderedLabels = getOrderedLabels(
168
+ term,
169
+ data.refs?.byTermId?.[q.tid]?.bins || [],
170
+ data.refs?.byTermId?.[q.tid]?.events,
171
+ q.term1_q
172
+ );
173
+ if (orderedLabels.length) {
174
+ lst.sort((a, b) => orderedLabels.indexOf(a.label) - orderedLabels.indexOf(b.label));
175
+ }
176
+ res.send({
177
+ lst,
178
+ orderedLabels
179
+ });
180
+ }
181
+ function getDefaultQ(term, q) {
182
+ if (term.type == "categorical")
183
+ return {};
184
+ if (term.type == "survival")
185
+ return {};
186
+ if (term.type == "integer" || term.type == "float")
187
+ return term.bins.default;
188
+ if (term.type == "condition") {
189
+ return {
190
+ mode: q.mode,
191
+ breaks: q.breaks,
192
+ bar_by_grade: q.bar_by_grade,
193
+ /*Leave this here until bug with term1_q not passing to getCategories is figured out.
194
+ Commented out b/c tvs condition tests fail.*/
195
+ //bar_by_children: term.subconditions || q.bar_by_children,
196
+ bar_by_children: q.bar_by_children,
197
+ value_by_max_grade: q.value_by_max_grade,
198
+ value_by_most_recent: q.value_by_most_recent,
199
+ //value_by_computable_grade: term.subconditions || q.value_by_computable_grade
200
+ value_by_computable_grade: q.value_by_computable_grade
201
+ };
202
+ }
203
+ if (term.type == "geneVariant")
204
+ return {};
205
+ throw "unknown term type";
206
+ }
207
+ export {
208
+ api
209
+ };
@@ -0,0 +1,228 @@
1
+ import path from "path";
2
+ import fs from "fs";
3
+ import lines2R from "#src/lines2R.js";
4
+ import * as utils from "#src/utils.js";
5
+ import serverconfig from "#src/serverconfig.js";
6
+ import { gdc_validate_query_geneExpression } from "#src/mds3.gdc.js";
7
+ import { mayLimitSamples } from "#src/mds3.filter.js";
8
+ import { dtgeneexpression } from "#shared/common.js";
9
+ const api = {
10
+ endpoint: "termdb/cluster",
11
+ methods: {
12
+ all: {
13
+ init,
14
+ request: {
15
+ typeId: "TermdbClusterRequest"
16
+ },
17
+ response: {
18
+ typeId: "TermdbClusterResponse"
19
+ }
20
+ }
21
+ }
22
+ };
23
+ function init({ genomes }) {
24
+ return async (req, res) => {
25
+ const q = req.query;
26
+ let result;
27
+ try {
28
+ const g = genomes[q.genome];
29
+ if (!g)
30
+ throw "invalid genome name";
31
+ const ds = g.datasets[q.dslabel];
32
+ if (!ds)
33
+ throw "invalid dataset name";
34
+ if (ds.__gdc && !ds.__gdc.doneCaching)
35
+ throw "The server has not finished caching the case IDs: try again in ~2 minutes";
36
+ if (q.dataType == dtgeneexpression) {
37
+ if (!ds.queries?.geneExpression)
38
+ throw "no geneExpression data on this dataset";
39
+ result = await getResult(q, ds);
40
+ } else {
41
+ throw "unknown q.dataType " + q.dataType;
42
+ }
43
+ } catch (e) {
44
+ if (e.stack)
45
+ console.log(e.stack);
46
+ result = {
47
+ status: e.status || 400,
48
+ error: e.message || e
49
+ };
50
+ }
51
+ res.send(result);
52
+ };
53
+ }
54
+ async function getResult(q, ds) {
55
+ const { gene2sample2value, byTermId, bySampleId } = await ds.queries.geneExpression.get(q);
56
+ if (gene2sample2value.size == 0)
57
+ throw "no data";
58
+ if (gene2sample2value.size == 1) {
59
+ const g = Array.from(gene2sample2value.keys())[0];
60
+ return { gene: g, data: gene2sample2value.get(g) };
61
+ }
62
+ const t = Date.now();
63
+ const clustering = await doClustering(gene2sample2value, q);
64
+ if (serverconfig.debugmode)
65
+ console.log("clustering done:", Date.now() - t, "ms");
66
+ return { clustering, byTermId, bySampleId };
67
+ }
68
+ async function doClustering(data, q) {
69
+ const sampleSet = /* @__PURE__ */ new Set();
70
+ for (const o of data.values()) {
71
+ for (const s in o)
72
+ sampleSet.add(s);
73
+ break;
74
+ }
75
+ const inputData = {
76
+ matrix: [],
77
+ row_names: [],
78
+ // genes
79
+ col_names: [...sampleSet],
80
+ // samples
81
+ cluster_method: q.clusterMethod,
82
+ plot_image: false
83
+ // When true causes cluster.rs to plot the image into a png file (EXPERIMENTAL)
84
+ };
85
+ for (const [gene, o] of data) {
86
+ inputData.row_names.push(gene);
87
+ const row = [];
88
+ for (const s of inputData.col_names) {
89
+ row.push(o[s] || 0);
90
+ }
91
+ inputData.matrix.push(getZscore(row));
92
+ }
93
+ const Rinputfile = path.join(serverconfig.cachedir, Math.random().toString() + ".json");
94
+ await utils.write_file(Rinputfile, JSON.stringify(inputData));
95
+ const Routput = JSON.parse(await lines2R(path.join(serverconfig.binpath, "utils/hclust.R"), [], [Rinputfile]));
96
+ await fs.promises.unlink(Rinputfile);
97
+ const row_names_index = Routput.RowOrder.map((row) => inputData.row_names.indexOf(row.name));
98
+ const col_names_index = Routput.ColOrder.map((col) => inputData.col_names.indexOf(col.name));
99
+ const output_matrix = [];
100
+ for (const rowI of row_names_index) {
101
+ const newRow = [];
102
+ for (const colI of col_names_index) {
103
+ newRow.push(inputData.matrix[rowI][colI]);
104
+ }
105
+ output_matrix.push(newRow);
106
+ }
107
+ return {
108
+ row: {
109
+ merge: Routput.RowMerge,
110
+ height: Routput.RowHeight,
111
+ order: Routput.RowOrder,
112
+ inputOrder: inputData.row_names
113
+ },
114
+ col: {
115
+ merge: Routput.ColumnMerge,
116
+ height: Routput.ColumnHeight,
117
+ order: Routput.ColOrder,
118
+ inputOrder: inputData.col_names
119
+ },
120
+ matrix: output_matrix
121
+ };
122
+ }
123
+ function getZscore(l) {
124
+ const mean = l.reduce((sum, v) => sum + v, 0) / l.length;
125
+ const sd = Math.sqrt(l.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / l.length);
126
+ if (sd == 0) {
127
+ return l;
128
+ }
129
+ return l.map((v) => (v - mean) / sd);
130
+ }
131
+ async function validate_query_geneExpression(ds, genome) {
132
+ const q = ds.queries.geneExpression;
133
+ if (!q)
134
+ return;
135
+ if (q.src == "gdcapi") {
136
+ gdc_validate_query_geneExpression(ds, genome);
137
+ return;
138
+ }
139
+ if (q.src == "native") {
140
+ await validateNative(q, ds, genome);
141
+ return;
142
+ }
143
+ throw "unknown queries.geneExpression.src";
144
+ }
145
+ async function validateNative(q, ds, genome) {
146
+ if (!q.file.startsWith(serverconfig.tpmasterdir))
147
+ q.file = path.join(serverconfig.tpmasterdir, q.file);
148
+ if (!q.samples)
149
+ q.samples = [];
150
+ await utils.validate_tabixfile(q.file);
151
+ q.nochr = await utils.tabix_is_nochr(q.file, null, genome);
152
+ q.samples = [];
153
+ {
154
+ const lines = await utils.get_header_tabix(q.file);
155
+ if (!lines[0])
156
+ throw "header line missing from " + q.file;
157
+ const l = lines[0].split(" ");
158
+ if (l.slice(0, 4).join(" ") != "#chr start stop gene")
159
+ throw "header line has wrong content for columns 1-4";
160
+ for (let i = 4; i < l.length; i++) {
161
+ const id = ds.cohort.termdb.q.sampleName2id(l[i]);
162
+ if (id == void 0)
163
+ throw "queries.geneExpression: unknown sample from header: " + l[i];
164
+ q.samples.push(id);
165
+ }
166
+ console.log(q.samples.length, "samples from geneExpression of", ds.label);
167
+ }
168
+ q.get = async (param) => {
169
+ const limitSamples = await mayLimitSamples(param, q.samples, ds);
170
+ if (limitSamples?.size == 0) {
171
+ return { gene2sample2value: /* @__PURE__ */ new Set(), byTermId: {}, bySampleId: {} };
172
+ }
173
+ const bySampleId = {};
174
+ const samples = q.samples || [];
175
+ if (limitSamples) {
176
+ for (const sid of limitSamples) {
177
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
178
+ }
179
+ } else {
180
+ for (const sid of samples) {
181
+ bySampleId[sid] = { label: ds.cohort.termdb.q.id2sampleName(sid) };
182
+ }
183
+ }
184
+ const gene2sample2value = /* @__PURE__ */ new Map();
185
+ for (const g of param.genes) {
186
+ if (!g.gene)
187
+ continue;
188
+ if (!g.chr) {
189
+ const lst = genome.genedb.getjsonbyname.all(g.gene);
190
+ if (lst.length == 0)
191
+ continue;
192
+ const j = JSON.parse(lst.find((i) => i.isdefault).genemodel || lst[0].genemodel);
193
+ g.start = j.start;
194
+ g.stop = j.stop;
195
+ g.chr = j.chr;
196
+ }
197
+ const s2v = {};
198
+ await utils.get_lines_bigfile({
199
+ args: [q.file, (q.nochr ? g.chr?.replace("chr", "") : g.chr) + ":" + g.start + "-" + g.stop],
200
+ // must do g.chr?.replace to avoid tsc error
201
+ callback: (line) => {
202
+ const l = line.split(" ");
203
+ if (l[3].toLowerCase() != g.gene.toLowerCase())
204
+ return;
205
+ for (let i = 4; i < l.length; i++) {
206
+ const sampleId = samples[i - 4];
207
+ if (limitSamples && !limitSamples.has(sampleId))
208
+ continue;
209
+ if (!l[i])
210
+ continue;
211
+ const v = Number(l[i]);
212
+ if (Number.isNaN(v))
213
+ throw "exp value not number";
214
+ s2v[sampleId] = v;
215
+ }
216
+ }
217
+ });
218
+ if (Object.keys(s2v).length)
219
+ gene2sample2value.set(g.gene, s2v);
220
+ }
221
+ const byTermId = {};
222
+ return { gene2sample2value, byTermId, bySampleId };
223
+ };
224
+ }
225
+ export {
226
+ api,
227
+ validate_query_geneExpression
228
+ };
@@ -0,0 +1,38 @@
1
+ import { get_ds_tdb } from "#src/termdb.js";
2
+ import { mayCopyFromCookie } from "#src/utils.js";
3
+ const api = {
4
+ endpoint: "termdb/cohort/summary",
5
+ methods: {
6
+ get: {
7
+ init,
8
+ request: {
9
+ typeId: "any"
10
+ },
11
+ response: {
12
+ typeId: "any"
13
+ }
14
+ }
15
+ }
16
+ };
17
+ function init({ genomes }) {
18
+ return async (req, res) => {
19
+ const q = req.query;
20
+ mayCopyFromCookie(q, req.cookies);
21
+ try {
22
+ const genome = genomes[q.genome];
23
+ if (!genome)
24
+ throw "invalid genome";
25
+ const [ds] = get_ds_tdb(genome, q);
26
+ res.send({ count: ds.cohort.termdb.q.getcohortsamplecount(q.cohort) });
27
+ } catch (e) {
28
+ res.send({ error: e.message || e });
29
+ if (e.stack)
30
+ console.log(e.stack);
31
+ else
32
+ console.log(e);
33
+ }
34
+ };
35
+ }
36
+ export {
37
+ api
38
+ };
@@ -0,0 +1,49 @@
1
+ import { get_ds_tdb } from "#src/termdb.js";
2
+ import { mayCopyFromCookie } from "#src/utils.js";
3
+ const api = {
4
+ endpoint: "termdb/cohorts",
5
+ methods: {
6
+ get: {
7
+ init,
8
+ request: {
9
+ typeId: "any"
10
+ },
11
+ response: {
12
+ typeId: "any"
13
+ }
14
+ }
15
+ }
16
+ };
17
+ function init({ genomes }) {
18
+ return async (req, res) => {
19
+ const q = req.query;
20
+ mayCopyFromCookie(q, req.cookies);
21
+ try {
22
+ const genome = genomes[q.genome];
23
+ if (!genome)
24
+ throw "invalid genome";
25
+ const [ds] = get_ds_tdb(genome, q);
26
+ const result = getCohortsData(ds);
27
+ res.send(result);
28
+ } catch (e) {
29
+ res.send({ error: e.message || e });
30
+ if (e.stack)
31
+ console.log(e.stack);
32
+ else
33
+ console.log(e);
34
+ }
35
+ };
36
+ }
37
+ function getCohortsData(ds) {
38
+ const features = ds.cohort.db.connection.prepare("select * from features").all();
39
+ const cohorts = ds.cohort.db.connection.prepare(
40
+ `select * from cohorts where cohort in (select distinct(cohort) from cohort_features)
41
+ order by sample_count desc`
42
+ ).all();
43
+ const cfeatures = ds.cohort.db.connection.prepare("select * from cohort_features").all();
44
+ return { cohorts, features, cfeatures };
45
+ }
46
+ export {
47
+ api,
48
+ getCohortsData
49
+ };