@sjcrh/proteinpaint-server 2.190.2 → 2.191.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -4
- package/src/app.js +10233 -9619
- package/routes/correlationVolcano.js +0 -112
- package/routes/dataset.js +0 -233
- package/routes/dsdata.js +0 -99
- package/routes/gdc.maf.js +0 -85
- package/routes/gdc.mafBuild.js +0 -115
- package/routes/genesetEnrichment.js +0 -226
- package/routes/grin2.js +0 -541
- package/routes/termdb.DE.js +0 -176
- package/routes/termdb.categories.js +0 -123
- package/routes/termdb.chat.js +0 -203
- package/routes/termdb.cluster.js +0 -456
- package/routes/termdb.config.js +0 -347
|
@@ -1,112 +0,0 @@
|
|
|
1
|
-
import { getData } from "../src/termdb.matrix.js";
|
|
2
|
-
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
3
|
-
import { mayLog } from "#src/helpers.ts";
|
|
4
|
-
import { getStdDev } from "./termdb.descrstats.ts";
|
|
5
|
-
const minArrayLength = 3;
|
|
6
|
-
function init({ genomes }) {
|
|
7
|
-
return async (req, res) => {
|
|
8
|
-
const q = req.query;
|
|
9
|
-
try {
|
|
10
|
-
const genome = genomes[q.genome];
|
|
11
|
-
if (!genome) throw "invalid genome name";
|
|
12
|
-
const ds = genome.datasets?.[q.dslabel];
|
|
13
|
-
if (!ds) throw "invalid ds";
|
|
14
|
-
const result = await compute(q, ds);
|
|
15
|
-
res.send(result);
|
|
16
|
-
} catch (e) {
|
|
17
|
-
res.send({ error: e?.message || e });
|
|
18
|
-
if (e instanceof Error && e.stack) console.error(e);
|
|
19
|
-
}
|
|
20
|
-
};
|
|
21
|
-
}
|
|
22
|
-
async function compute(q, ds) {
|
|
23
|
-
if (!q.featureTw.$id) throw "featureTw.$id missing";
|
|
24
|
-
if (!ds.cohort.correlationVolcano.feature.termTypes.includes(q.featureTw?.term.type))
|
|
25
|
-
throw "unsupported featureTw.term.type";
|
|
26
|
-
const data = await getData(
|
|
27
|
-
{
|
|
28
|
-
filter: q.filter,
|
|
29
|
-
filter0: q.filter0,
|
|
30
|
-
terms: [q.featureTw, ...q.variableTwLst],
|
|
31
|
-
__protected__: q.__protected__
|
|
32
|
-
},
|
|
33
|
-
ds
|
|
34
|
-
);
|
|
35
|
-
if (data.error) throw data.error;
|
|
36
|
-
const vtid2array = /* @__PURE__ */ new Map();
|
|
37
|
-
for (const tw of q.variableTwLst) {
|
|
38
|
-
vtid2array.set(tw.$id, { id: tw.$id, v1: [], v2: [] });
|
|
39
|
-
}
|
|
40
|
-
for (const sid in data.samples) {
|
|
41
|
-
const featureValue = data.samples[sid][q.featureTw.$id]?.value;
|
|
42
|
-
if (!Number.isFinite(featureValue)) continue;
|
|
43
|
-
for (const tw of q.variableTwLst) {
|
|
44
|
-
if (!tw.$id) throw "variableTwLst[].$id missing";
|
|
45
|
-
const variableValue = data.samples[sid][tw.$id]?.value;
|
|
46
|
-
if (!Number.isFinite(variableValue)) continue;
|
|
47
|
-
vtid2array.get(tw.$id).v1.push(featureValue);
|
|
48
|
-
vtid2array.get(tw.$id).v2.push(variableValue);
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
const acceptedVariables = [], skippedVariables = [];
|
|
52
|
-
for (const [tid, v] of vtid2array) {
|
|
53
|
-
if (v.v1.length < minArrayLength || v.v2.length < minArrayLength || getStdDev(v.v1) == 0 || getStdDev(v.v2) == 0) {
|
|
54
|
-
skippedVariables.push({ tw$id: tid });
|
|
55
|
-
continue;
|
|
56
|
-
}
|
|
57
|
-
acceptedVariables.push(v);
|
|
58
|
-
}
|
|
59
|
-
const result = { skippedVariables, variableItems: [] };
|
|
60
|
-
if (!acceptedVariables.length) return result;
|
|
61
|
-
const input = {
|
|
62
|
-
method: q.correlationMethod || "pearson",
|
|
63
|
-
terms: acceptedVariables
|
|
64
|
-
};
|
|
65
|
-
const time1 = Date.now();
|
|
66
|
-
const output = {
|
|
67
|
-
terms: JSON.parse(await run_R("corr.R", JSON.stringify(input)))
|
|
68
|
-
};
|
|
69
|
-
mayLog("Time taken to run correlation analysis:", Date.now() - time1);
|
|
70
|
-
for (const t of output.terms) {
|
|
71
|
-
const t2 = {
|
|
72
|
-
tw$id: t.id,
|
|
73
|
-
sampleSize: t.sample_size,
|
|
74
|
-
//sampleSize: input.terms.get(t.id).v1.length, // This was not working so passed the length of each array from R
|
|
75
|
-
correlation: t.correlation,
|
|
76
|
-
original_pvalue: t.original_p_value,
|
|
77
|
-
adjusted_pvalue: t.adjusted_p_value
|
|
78
|
-
};
|
|
79
|
-
result.variableItems.push(t2);
|
|
80
|
-
}
|
|
81
|
-
return result;
|
|
82
|
-
}
|
|
83
|
-
function validate_correlationVolcano(ds) {
|
|
84
|
-
const cv = ds.cohort.correlationVolcano;
|
|
85
|
-
if (!cv) return;
|
|
86
|
-
if (typeof cv.feature != "object") throw "cv.feature not obj";
|
|
87
|
-
if (!Array.isArray(cv.feature.termTypes)) throw "cv.feature.termTypes[] not array";
|
|
88
|
-
for (const t of cv.feature.termTypes) {
|
|
89
|
-
if (t == "geneExpression") {
|
|
90
|
-
if (!ds.queries?.geneExpression) throw "geneExpression cv.feature is not supported";
|
|
91
|
-
} else if (t == "ssGSEA") {
|
|
92
|
-
if (!ds.queries?.ssGSEA) throw "ssGSEA cv.feature is not supported";
|
|
93
|
-
} else {
|
|
94
|
-
throw "unknown cv.feature.termType";
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
if (typeof cv.variables != "object") throw "cv.variables not obj";
|
|
98
|
-
if (cv.variables.type == "dictionaryTerm") {
|
|
99
|
-
if (!Array.isArray(cv.variables.termIds)) throw "cv.variables.termIds not array when type=dictionaryTerm";
|
|
100
|
-
for (const id of cv.variables.termIds) {
|
|
101
|
-
const t = ds.cohort.termdb.q.termjsonByOneid(id);
|
|
102
|
-
if (!t) throw "cv.variables.termIds: unknown id: " + id;
|
|
103
|
-
if (t.type != "integer" && t.type != "float") throw "cv.variables.termIds: not integer/float: " + id;
|
|
104
|
-
}
|
|
105
|
-
} else {
|
|
106
|
-
throw "unknown cv.variables.type";
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
export {
|
|
110
|
-
init,
|
|
111
|
-
validate_correlationVolcano
|
|
112
|
-
};
|
package/routes/dataset.js
DELETED
|
@@ -1,233 +0,0 @@
|
|
|
1
|
-
import * as mds3_init from "#src/mds3.init.js";
|
|
2
|
-
import * as common from "#shared/common.js";
|
|
3
|
-
function init({ genomes }) {
|
|
4
|
-
return function(req, res) {
|
|
5
|
-
try {
|
|
6
|
-
const q = req.query;
|
|
7
|
-
const genome = genomes[q.genome];
|
|
8
|
-
if (!genome) throw "unknown genome";
|
|
9
|
-
if (!genome.datasets) throw "genomeobj.datasets{} missing";
|
|
10
|
-
let ds;
|
|
11
|
-
for (const k in genome.datasets) {
|
|
12
|
-
if (k.toLowerCase() == q.dsname.toLowerCase()) {
|
|
13
|
-
ds = genome.datasets[k];
|
|
14
|
-
break;
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
if (!ds) throw "invalid dsname";
|
|
18
|
-
const copy = ds.isMds3 ? mds3_init.client_copy(ds) : ds.isMds ? mds_clientcopy(ds) : copy_legacyDataset(ds);
|
|
19
|
-
return res.send({ ds: copy });
|
|
20
|
-
} catch (e) {
|
|
21
|
-
res.send({ error: e.message || e });
|
|
22
|
-
}
|
|
23
|
-
};
|
|
24
|
-
}
|
|
25
|
-
function mds_clientcopy(ds) {
|
|
26
|
-
const ds2 = {
|
|
27
|
-
isMds: true,
|
|
28
|
-
noHandleOnClient: ds.noHandleOnClient,
|
|
29
|
-
label: ds.label,
|
|
30
|
-
version: ds.version,
|
|
31
|
-
annotationsampleset2matrix: ds.annotationsampleset2matrix,
|
|
32
|
-
mutationAttribute: ds.mutationAttribute,
|
|
33
|
-
locusAttribute: ds.locusAttribute,
|
|
34
|
-
alleleAttribute: ds.alleleAttribute,
|
|
35
|
-
// these are quick fixes and should be deleted later
|
|
36
|
-
hide_genotypedownload: ds.hide_genotypedownload,
|
|
37
|
-
hide_phewas: ds.hide_phewas,
|
|
38
|
-
sample2bam: ds.sample2bam
|
|
39
|
-
};
|
|
40
|
-
if (ds.queries) {
|
|
41
|
-
ds2.queries = {};
|
|
42
|
-
}
|
|
43
|
-
if (ds.singlesamplemutationjson) {
|
|
44
|
-
ds2.singlesamplemutationjson = 1;
|
|
45
|
-
}
|
|
46
|
-
if (ds.gene2mutcount) {
|
|
47
|
-
ds2.gene2mutcount = true;
|
|
48
|
-
ds2.mutCountType = ds.gene2mutcount.mutationTypes;
|
|
49
|
-
}
|
|
50
|
-
if (ds.assayAvailability) {
|
|
51
|
-
ds2.assayAvailability = 1;
|
|
52
|
-
}
|
|
53
|
-
if (ds.cohort && ds.cohort.sampleAttribute) {
|
|
54
|
-
const toclient = {};
|
|
55
|
-
for (const k in ds.cohort.sampleAttribute.attributes) {
|
|
56
|
-
const a = ds.cohort.sampleAttribute.attributes[k];
|
|
57
|
-
if (!a.clientnoshow) toclient[k] = a;
|
|
58
|
-
}
|
|
59
|
-
ds2.sampleAttribute = { attributes: toclient };
|
|
60
|
-
}
|
|
61
|
-
if (ds.cohort) {
|
|
62
|
-
if (ds.cohort.termdb) {
|
|
63
|
-
ds2.termdb = {
|
|
64
|
-
selectCohort: ds.cohort.termdb.selectCohort
|
|
65
|
-
};
|
|
66
|
-
}
|
|
67
|
-
if (ds.cohort.attributes && ds.cohort.attributes.defaulthidden) {
|
|
68
|
-
ds2.cohortHiddenAttr = ds.cohort.attributes.defaulthidden;
|
|
69
|
-
}
|
|
70
|
-
if (ds.cohort.survivalplot) {
|
|
71
|
-
ds2.survivalplot = {
|
|
72
|
-
samplegroupattrlst: ds.cohort.survivalplot.samplegroupattrlst,
|
|
73
|
-
plots: []
|
|
74
|
-
};
|
|
75
|
-
for (const k in ds.cohort.survivalplot.plots) {
|
|
76
|
-
const p = ds.cohort.survivalplot.plots[k];
|
|
77
|
-
ds2.survivalplot.plots.push({
|
|
78
|
-
key: k,
|
|
79
|
-
name: p.name,
|
|
80
|
-
timelabel: p.timelabel
|
|
81
|
-
});
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
if (ds.cohort.mutation_signature) {
|
|
85
|
-
const sets = {};
|
|
86
|
-
for (const k in ds.cohort.mutation_signature.sets) {
|
|
87
|
-
const s = ds.cohort.mutation_signature.sets[k];
|
|
88
|
-
sets[k] = {
|
|
89
|
-
name: s.name,
|
|
90
|
-
signatures: s.signatures
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
ds2.mutation_signature = { sets };
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
for (const k in ds.queries) {
|
|
97
|
-
const q = ds.queries[k];
|
|
98
|
-
const clientquery = {
|
|
99
|
-
// revealed to client
|
|
100
|
-
name: q.name,
|
|
101
|
-
hideforthemoment: q.hideforthemoment
|
|
102
|
-
// hide track not ready to show on client
|
|
103
|
-
};
|
|
104
|
-
if (q.istrack) {
|
|
105
|
-
clientquery.istrack = true;
|
|
106
|
-
clientquery.type = q.type;
|
|
107
|
-
clientquery.isfull = q.isfull;
|
|
108
|
-
if (q.nochr != void 0) {
|
|
109
|
-
clientquery.nochr = q.nochr;
|
|
110
|
-
}
|
|
111
|
-
if (q.infoFilter) {
|
|
112
|
-
clientquery.infoFilter = q.infoFilter;
|
|
113
|
-
}
|
|
114
|
-
if (q.readcountCutoff) {
|
|
115
|
-
clientquery.readcountCutoff = q.readcountCutoff;
|
|
116
|
-
}
|
|
117
|
-
if (q.valueLabel) {
|
|
118
|
-
clientquery.valueLabel = q.valueLabel;
|
|
119
|
-
}
|
|
120
|
-
if (q.valueCutoff) {
|
|
121
|
-
clientquery.valueCutoff = q.valueCutoff;
|
|
122
|
-
}
|
|
123
|
-
if (q.bplengthUpperLimit) {
|
|
124
|
-
clientquery.bplengthUpperLimit = q.bplengthUpperLimit;
|
|
125
|
-
}
|
|
126
|
-
if (q.segmeanValueCutoff) {
|
|
127
|
-
clientquery.segmeanValueCutoff = q.segmeanValueCutoff;
|
|
128
|
-
}
|
|
129
|
-
if (q.lohLengthUpperLimit) {
|
|
130
|
-
clientquery.lohLengthUpperLimit = q.lohLengthUpperLimit;
|
|
131
|
-
}
|
|
132
|
-
if (q.type == common.tkt.mdssvcnv) {
|
|
133
|
-
if (q.groupsamplebyattr) {
|
|
134
|
-
clientquery.groupsamplebyattr = q.groupsamplebyattr;
|
|
135
|
-
}
|
|
136
|
-
clientquery.multihidelabel_fusion = q.multihidelabel_fusion;
|
|
137
|
-
clientquery.multihidelabel_sv = q.multihidelabel_sv;
|
|
138
|
-
clientquery.multihidelabel_vcf = q.multihidelabel_vcf;
|
|
139
|
-
clientquery.showfullmode = q.showfullmode;
|
|
140
|
-
clientquery.legend_vorigin = q.legend_vorigin;
|
|
141
|
-
clientquery.no_loh = q.no_loh;
|
|
142
|
-
if (q.expressionrank_querykey) {
|
|
143
|
-
const e = ds.queries[q.expressionrank_querykey];
|
|
144
|
-
clientquery.checkexpressionrank = {
|
|
145
|
-
querykey: q.expressionrank_querykey,
|
|
146
|
-
datatype: e.datatype
|
|
147
|
-
};
|
|
148
|
-
if (e.boxplotbysamplegroup && e.boxplotbysamplegroup.additionals) {
|
|
149
|
-
const lst = [];
|
|
150
|
-
if (e.boxplotbysamplegroup.attributes)
|
|
151
|
-
lst.push(e.boxplotbysamplegroup.attributes.map((i) => i.label).join(", "));
|
|
152
|
-
for (const i of e.boxplotbysamplegroup.additionals) lst.push(i.label);
|
|
153
|
-
clientquery.checkexpressionrank.boxplotgroupers = lst;
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
if (q.vcf_querykey) {
|
|
157
|
-
clientquery.checkvcf = {
|
|
158
|
-
querykey: q.vcf_querykey,
|
|
159
|
-
info: ds.queries[q.vcf_querykey].info,
|
|
160
|
-
format: {}
|
|
161
|
-
};
|
|
162
|
-
for (const tk of ds.queries[q.vcf_querykey].tracks) {
|
|
163
|
-
if (tk.format) {
|
|
164
|
-
for (const k2 in tk.format) {
|
|
165
|
-
clientquery.checkvcf.format[k2] = tk.format[k2];
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
} else if (q.isgenenumeric) {
|
|
172
|
-
clientquery.isgenenumeric = true;
|
|
173
|
-
clientquery.datatype = q.datatype;
|
|
174
|
-
clientquery.no_ase = q.no_ase;
|
|
175
|
-
} else {
|
|
176
|
-
continue;
|
|
177
|
-
}
|
|
178
|
-
ds2.queries[k] = clientquery;
|
|
179
|
-
}
|
|
180
|
-
return ds2;
|
|
181
|
-
}
|
|
182
|
-
function copy_legacyDataset(ds) {
|
|
183
|
-
const ds2 = {
|
|
184
|
-
noHandleOnClient: ds.noHandleOnClient,
|
|
185
|
-
sampleselectable: ds.sampleselectable,
|
|
186
|
-
label: ds.label,
|
|
187
|
-
dsinfo: ds.dsinfo,
|
|
188
|
-
stratify: ds.stratify,
|
|
189
|
-
cohort: ds.cohort,
|
|
190
|
-
vcfinfofilter: ds.vcfinfofilter,
|
|
191
|
-
info2table: ds.info2table,
|
|
192
|
-
info2singletable: ds.info2singletable,
|
|
193
|
-
url4variant: ds.url4variant,
|
|
194
|
-
itemlabelname: ds.itemlabelname
|
|
195
|
-
};
|
|
196
|
-
if (ds.snvindel_attributes) {
|
|
197
|
-
ds2.snvindel_attributes = [];
|
|
198
|
-
for (const at of ds.snvindel_attributes) {
|
|
199
|
-
const rep = {};
|
|
200
|
-
for (const k in at) {
|
|
201
|
-
if (k == "lst") {
|
|
202
|
-
rep.lst = [];
|
|
203
|
-
for (const e of at.lst) {
|
|
204
|
-
const rep2 = {};
|
|
205
|
-
for (const k2 in e) rep2[k2] = e[k2];
|
|
206
|
-
rep.lst.push(rep2);
|
|
207
|
-
}
|
|
208
|
-
} else {
|
|
209
|
-
rep[k] = at[k];
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
ds2.snvindel_attributes.push(rep);
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
if (ds.snvindel_legend) {
|
|
216
|
-
ds2.snvindel_legend = ds.snvindel_legend;
|
|
217
|
-
}
|
|
218
|
-
const vcfinfo = {};
|
|
219
|
-
let hasvcf = false;
|
|
220
|
-
for (const q of ds.queries) {
|
|
221
|
-
if (q.vcf) {
|
|
222
|
-
hasvcf = true;
|
|
223
|
-
vcfinfo[q.vcf.vcfid] = q.vcf;
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
if (hasvcf) {
|
|
227
|
-
ds2.id2vcf = vcfinfo;
|
|
228
|
-
}
|
|
229
|
-
return ds2;
|
|
230
|
-
}
|
|
231
|
-
export {
|
|
232
|
-
init
|
|
233
|
-
};
|
package/routes/dsdata.js
DELETED
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
import path from "path";
|
|
2
|
-
import { spawn } from "child_process";
|
|
3
|
-
import serverconfig from "#src/serverconfig.js";
|
|
4
|
-
import * as common from "#shared/common.js";
|
|
5
|
-
function init({ genomes }) {
|
|
6
|
-
return async function handle_dsdata(req, res) {
|
|
7
|
-
try {
|
|
8
|
-
const q = req.query;
|
|
9
|
-
if (!genomes[q.genome]) throw "invalid genome";
|
|
10
|
-
if (!q.dsname) throw ".dsname missing";
|
|
11
|
-
const ds = genomes[q.genome].datasets[q.dsname];
|
|
12
|
-
if (!ds) throw "invalid dsname";
|
|
13
|
-
const data = [];
|
|
14
|
-
for (const query of ds.queries) {
|
|
15
|
-
if (q.expressiononly && !query.isgeneexpression) {
|
|
16
|
-
continue;
|
|
17
|
-
}
|
|
18
|
-
if (q.noexpression && query.isgeneexpression) {
|
|
19
|
-
continue;
|
|
20
|
-
}
|
|
21
|
-
if (query.dsblocktracklst) {
|
|
22
|
-
continue;
|
|
23
|
-
}
|
|
24
|
-
if (query.vcffile) {
|
|
25
|
-
const d = await handle_dsdata_vcf(query, req);
|
|
26
|
-
data.push(d);
|
|
27
|
-
continue;
|
|
28
|
-
}
|
|
29
|
-
if (query.makequery) {
|
|
30
|
-
const d = handle_dsdata_makequery(ds, query, req, genomes);
|
|
31
|
-
data.push(d);
|
|
32
|
-
continue;
|
|
33
|
-
}
|
|
34
|
-
throw "unknow type from one of ds.queries[]";
|
|
35
|
-
}
|
|
36
|
-
res.send({ data });
|
|
37
|
-
} catch (e) {
|
|
38
|
-
if (e.stack) console.log(e.stack);
|
|
39
|
-
res.send({ error: e.message || e });
|
|
40
|
-
}
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
function handle_dsdata_makequery(ds, query, req, genomes) {
|
|
44
|
-
if (req.query.isoform) {
|
|
45
|
-
if (genomes[req.query.genome].genomicNameRegexp.test(req.query.isoform)) return;
|
|
46
|
-
}
|
|
47
|
-
const [sqlstr, values] = query.makequery(req.query);
|
|
48
|
-
if (!sqlstr) {
|
|
49
|
-
return;
|
|
50
|
-
}
|
|
51
|
-
const rows = ds.newconn.prepare(sqlstr).all(values);
|
|
52
|
-
let lst;
|
|
53
|
-
if (query.tidy) {
|
|
54
|
-
lst = rows.map((i) => query.tidy(i));
|
|
55
|
-
} else {
|
|
56
|
-
lst = rows;
|
|
57
|
-
}
|
|
58
|
-
const result = {};
|
|
59
|
-
if (query.isgeneexpression) {
|
|
60
|
-
result.lst = lst;
|
|
61
|
-
result.isgeneexpression = true;
|
|
62
|
-
result.config = query.config;
|
|
63
|
-
for (const q2 of ds.queries) {
|
|
64
|
-
if (!q2.dsblocktracklst) continue;
|
|
65
|
-
for (const tk of q2.dsblocktracklst) {
|
|
66
|
-
if (tk.type == common.tkt.junction) {
|
|
67
|
-
result.config.dsjunctiontk = tk;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
} else {
|
|
72
|
-
result.lst = lst;
|
|
73
|
-
}
|
|
74
|
-
return result;
|
|
75
|
-
}
|
|
76
|
-
function handle_dsdata_vcf(query, req) {
|
|
77
|
-
const par = [
|
|
78
|
-
path.join(serverconfig.tpmasterdir, query.vcffile),
|
|
79
|
-
(query.vcf.nochr ? req.query.range.chr.replace("chr", "") : req.query.range.chr) + ":" + req.query.range.start + "-" + req.query.range.stop
|
|
80
|
-
];
|
|
81
|
-
return new Promise((resolve, reject) => {
|
|
82
|
-
const ps = spawn(serverconfig.tabix, par);
|
|
83
|
-
const out = [], out2 = [];
|
|
84
|
-
ps.stdout.on("data", (i) => out.push(i));
|
|
85
|
-
ps.stderr.on("data", (i) => out2.push(i));
|
|
86
|
-
ps.on("close", () => {
|
|
87
|
-
const e = out2.join("").trim();
|
|
88
|
-
if (e != "") reject("error querying vcf file");
|
|
89
|
-
const tmp = out.join("").trim();
|
|
90
|
-
resolve({
|
|
91
|
-
lines: tmp == "" ? [] : tmp.split("\n"),
|
|
92
|
-
vcfid: query.vcf.vcfid
|
|
93
|
-
});
|
|
94
|
-
});
|
|
95
|
-
});
|
|
96
|
-
}
|
|
97
|
-
export {
|
|
98
|
-
init
|
|
99
|
-
};
|
package/routes/gdc.maf.js
DELETED
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
import ky from "ky";
|
|
2
|
-
import { joinUrl } from "#shared/joinUrl.js";
|
|
3
|
-
import serverconfig from "#src/serverconfig.js";
|
|
4
|
-
import { getGdcSampletypes } from "#src/mds3.gdc.js";
|
|
5
|
-
const maxFileNumber = 1e3;
|
|
6
|
-
const allowedWorkflowType = "Aliquot Ensemble Somatic Variant Merging and Masking";
|
|
7
|
-
const maxTotalSizeCompressed = serverconfig.features.gdcMafMaxFileSize || 4e8;
|
|
8
|
-
function init({ genomes }) {
|
|
9
|
-
return async (req, res) => {
|
|
10
|
-
try {
|
|
11
|
-
const g = genomes.hg38;
|
|
12
|
-
if (!g) throw "hg38 missing";
|
|
13
|
-
const ds = g.datasets.GDC;
|
|
14
|
-
if (!ds) throw "hg38 GDC missing";
|
|
15
|
-
const q = req.query;
|
|
16
|
-
const payload = await listMafFiles(q, ds);
|
|
17
|
-
res.send(payload);
|
|
18
|
-
} catch (e) {
|
|
19
|
-
res.send({ status: "error", error: e.message || e });
|
|
20
|
-
}
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
async function listMafFiles(q, ds) {
|
|
24
|
-
const filters = {
|
|
25
|
-
op: "and",
|
|
26
|
-
content: [
|
|
27
|
-
{ op: "=", content: { field: "data_format", value: "MAF" } },
|
|
28
|
-
{ op: "=", content: { field: "experimental_strategy", value: q.experimentalStrategy } },
|
|
29
|
-
{ op: "=", content: { field: "analysis.workflow_type", value: allowedWorkflowType } },
|
|
30
|
-
{ op: "=", content: { field: "access", value: "open" } }
|
|
31
|
-
// delete if later to support controlled files
|
|
32
|
-
]
|
|
33
|
-
};
|
|
34
|
-
const case_filters = { op: "and", content: [] };
|
|
35
|
-
if (q.filter0) {
|
|
36
|
-
case_filters.content.push(q.filter0);
|
|
37
|
-
}
|
|
38
|
-
const { host, headers } = ds.getHostHeaders(q);
|
|
39
|
-
const body = {
|
|
40
|
-
filters,
|
|
41
|
-
size: maxFileNumber,
|
|
42
|
-
fields: [
|
|
43
|
-
"id",
|
|
44
|
-
"file_size",
|
|
45
|
-
"cases.project.project_id",
|
|
46
|
-
// for display only
|
|
47
|
-
"cases.case_id",
|
|
48
|
-
// case uuid for making case url link to portal
|
|
49
|
-
"cases.submitter_id",
|
|
50
|
-
// used when listing all cases & files
|
|
51
|
-
"cases.samples.tissue_type",
|
|
52
|
-
"cases.samples.tumor_descriptor"
|
|
53
|
-
].join(",")
|
|
54
|
-
};
|
|
55
|
-
if (case_filters.content.length) body.case_filters = case_filters;
|
|
56
|
-
const response = await ky.post(joinUrl(host.rest, "files"), { headers, timeout: false, json: body });
|
|
57
|
-
if (!response.ok) throw `HTTP Error: ${response.status} ${response.statusText}`;
|
|
58
|
-
const re = await response.json();
|
|
59
|
-
if (!Number.isInteger(re.data?.pagination?.total)) throw "re.data.pagination.total is not int";
|
|
60
|
-
if (!Array.isArray(re.data?.hits)) throw "re.data.hits[] not array";
|
|
61
|
-
const files = [];
|
|
62
|
-
for (const h of re.data.hits) {
|
|
63
|
-
const c = h.cases?.[0];
|
|
64
|
-
if (!c) throw "h.cases[0] missing";
|
|
65
|
-
files.push({
|
|
66
|
-
id: h.id,
|
|
67
|
-
project_id: c.project.project_id,
|
|
68
|
-
file_size: h.file_size,
|
|
69
|
-
case_submitter_id: c.submitter_id,
|
|
70
|
-
case_uuid: c.case_id,
|
|
71
|
-
sample_types: getGdcSampletypes(c)
|
|
72
|
-
});
|
|
73
|
-
}
|
|
74
|
-
files.sort((a, b) => b.file_size - a.file_size);
|
|
75
|
-
const result = {
|
|
76
|
-
files,
|
|
77
|
-
filesTotal: re.data.pagination.total,
|
|
78
|
-
maxTotalSizeCompressed
|
|
79
|
-
};
|
|
80
|
-
return result;
|
|
81
|
-
}
|
|
82
|
-
export {
|
|
83
|
-
init,
|
|
84
|
-
maxTotalSizeCompressed
|
|
85
|
-
};
|
package/routes/gdc.mafBuild.js
DELETED
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
import ky from "ky";
|
|
2
|
-
import { joinUrl } from "#shared/joinUrl.js";
|
|
3
|
-
import { stream_rust } from "@sjcrh/proteinpaint-rust";
|
|
4
|
-
import { maxTotalSizeCompressed } from "./gdc.maf.ts";
|
|
5
|
-
import { mayLog } from "#src/helpers.ts";
|
|
6
|
-
function init({ genomes }) {
|
|
7
|
-
return async (req, res) => {
|
|
8
|
-
try {
|
|
9
|
-
const g = genomes.hg38;
|
|
10
|
-
if (!g) throw "hg38 missing";
|
|
11
|
-
const ds = g.datasets.GDC;
|
|
12
|
-
if (!ds) throw "hg38 GDC missing";
|
|
13
|
-
const q = req.query;
|
|
14
|
-
await buildMaf(q, res, ds);
|
|
15
|
-
} catch (e) {
|
|
16
|
-
if (e.stack) console.log(e.stack);
|
|
17
|
-
res.send({ status: "error", error: e.message || e });
|
|
18
|
-
}
|
|
19
|
-
};
|
|
20
|
-
}
|
|
21
|
-
async function buildMaf(q, res, ds) {
|
|
22
|
-
const t0 = Date.now();
|
|
23
|
-
const { host, headers } = ds.getHostHeaders(q);
|
|
24
|
-
const fileLst2 = await getFileLstUnderSizeLimit(q.fileIdLst, host, headers);
|
|
25
|
-
mayLog(`${fileLst2.length} out of ${q.fileIdLst.length} input MAF files accepted by size limit`, Date.now() - t0);
|
|
26
|
-
const arg = {
|
|
27
|
-
fileIdLst: fileLst2,
|
|
28
|
-
columns: q.columns,
|
|
29
|
-
host: joinUrl(host.rest, "data"),
|
|
30
|
-
// must use the /data/ endpoint from current host
|
|
31
|
-
headers
|
|
32
|
-
};
|
|
33
|
-
const boundary = "------------------------GDC-MAF-BUILD";
|
|
34
|
-
res.setHeader("Content-Type", `multipart/form-data; boundary=${boundary}`);
|
|
35
|
-
res.write(`--${boundary}`);
|
|
36
|
-
res.write('\r\nContent-Disposition: form-data; name="gzfile"; filename="cohort.maf.gz"');
|
|
37
|
-
res.write("\r\nContent-Type: application/gzip\r\n\r\n");
|
|
38
|
-
res.flush();
|
|
39
|
-
try {
|
|
40
|
-
const streams = stream_rust("gdcmaf", JSON.stringify(arg), emitJson);
|
|
41
|
-
if (streams) {
|
|
42
|
-
const { rustStream, endStream } = streams;
|
|
43
|
-
res.on("close", () => {
|
|
44
|
-
if (res.writableEnded) return;
|
|
45
|
-
try {
|
|
46
|
-
console.log("\n-- forced res.end() ---\n");
|
|
47
|
-
res.end();
|
|
48
|
-
} catch (e) {
|
|
49
|
-
console.log("error with forced res.end()", e);
|
|
50
|
-
}
|
|
51
|
-
try {
|
|
52
|
-
endStream();
|
|
53
|
-
} catch (e) {
|
|
54
|
-
console.log("error in calling endStream()", e);
|
|
55
|
-
}
|
|
56
|
-
});
|
|
57
|
-
rustStream.pipe(res, { end: false }).on("error", (e) => {
|
|
58
|
-
console.log("rustStream.pipe().on(error)", e);
|
|
59
|
-
}).on("end", () => {
|
|
60
|
-
if (res.writableEnded) return;
|
|
61
|
-
console.log("rustStream.on(end), trigger res.end()");
|
|
62
|
-
res.end();
|
|
63
|
-
});
|
|
64
|
-
} else {
|
|
65
|
-
emitJson({ error: "server error: undefined rustStream" });
|
|
66
|
-
}
|
|
67
|
-
} catch (e) {
|
|
68
|
-
console.log("error calling stream_rust(gdcmaf)", e);
|
|
69
|
-
}
|
|
70
|
-
function emitJson(data, end = true) {
|
|
71
|
-
if (res.writableEnded) return;
|
|
72
|
-
if (data) {
|
|
73
|
-
res.write(`\r
|
|
74
|
-
--${boundary}`);
|
|
75
|
-
res.write('\r\nContent-Disposition: form-data; name="errors"');
|
|
76
|
-
res.write("\r\nContent-Type: application/x-jsonlines");
|
|
77
|
-
const json = typeof data === "string" ? data : JSON.stringify(data || { ok: true, status: "ok" });
|
|
78
|
-
res.write("\r\n\r\n" + json);
|
|
79
|
-
}
|
|
80
|
-
res.write(`\r
|
|
81
|
-
--${boundary}--\r
|
|
82
|
-
`);
|
|
83
|
-
mayLog("rust gdcmaf", Date.now() - t0);
|
|
84
|
-
if (end) res.end();
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
async function getFileLstUnderSizeLimit(lst, host, headers) {
|
|
88
|
-
if (lst.length == 0) throw "fileIdLst[] not array or blank";
|
|
89
|
-
const body = {
|
|
90
|
-
filters: {
|
|
91
|
-
op: "in",
|
|
92
|
-
content: { field: "file_id", value: lst }
|
|
93
|
-
},
|
|
94
|
-
size: 1e4,
|
|
95
|
-
fields: "file_size"
|
|
96
|
-
};
|
|
97
|
-
const response = await ky.post(joinUrl(host.rest, "files"), { headers, timeout: false, json: body });
|
|
98
|
-
if (!response.ok) throw `HTTP Error: ${response.status} ${response.statusText}`;
|
|
99
|
-
const re = await response.json();
|
|
100
|
-
if (!Array.isArray(re.data?.hits)) throw "re.data.hits[] not array";
|
|
101
|
-
const out = [];
|
|
102
|
-
let cumsize = 0;
|
|
103
|
-
for (const h of re.data.hits) {
|
|
104
|
-
if (cumsize >= maxTotalSizeCompressed) break;
|
|
105
|
-
if (!h.id) throw ".id missing";
|
|
106
|
-
if (!Number.isInteger(h.file_size)) throw ".file_size not integer";
|
|
107
|
-
cumsize += h.file_size;
|
|
108
|
-
out.push(h.id);
|
|
109
|
-
}
|
|
110
|
-
if (out.length == 0) throw "no file available";
|
|
111
|
-
return out;
|
|
112
|
-
}
|
|
113
|
-
export {
|
|
114
|
-
init
|
|
115
|
-
};
|