@sjcrh/proteinpaint-server 2.124.1 → 2.126.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -12
- package/dataset/termdb.test.js +2 -1
- package/package.json +8 -13
- package/routes/gdcGRIN2list.js +121 -0
- package/routes/gdcGRIN2run.js +96 -0
- package/routes/genesetEnrichment.js +15 -13
- package/routes/termdb.cluster.js +1 -1
- package/routes/termdb.config.js +1 -0
- package/src/app.js +1105 -858
- package/src/serverconfig.js +9 -0
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#
|
|
1
|
+
# ProteinPaint Server
|
|
2
2
|
|
|
3
3
|
The data server backend for the ProteinPaint application
|
|
4
4
|
|
|
@@ -6,26 +6,38 @@ The data server backend for the ProteinPaint application
|
|
|
6
6
|
|
|
7
7
|
The client dependencies should be installed as a workspace, follow the README at the project root.
|
|
8
8
|
|
|
9
|
-
You can either
|
|
10
|
-
|
|
11
|
-
- or install the [system depedencies](https://docs.google.com/document/d/1tkEHG_vYtT-OifPV-tlPeWQUMsEd3aWAKf5ExOT8G34/edit#heading=h.jy5sdrb1zkut)
|
|
12
|
-
as listed in the [installation instructions](https://docs.google.com/document/d/1tkEHG_vYtT-OifPV-tlPeWQUMsEd3aWAKf5ExOT8G34/edit#heading=h.6nxua6c3ik9l).
|
|
9
|
+
You can either use a docker container for development (../build/dev/README.md), or build from source.
|
|
10
|
+
See https://github.com/stjude/proteinpaint/wiki.
|
|
13
11
|
|
|
14
12
|
|
|
15
|
-
##
|
|
13
|
+
## Serverconfig
|
|
14
|
+
|
|
15
|
+
`server/serverconfig.json` is used:
|
|
16
|
+
- when running any server or test scripts from the pp/server directory
|
|
17
|
+
- if `${process.cwd()/serverconfig.json` does not exist wherever `@sjcrh/proteinpaint-server` is called (if can give example of this usecase)
|
|
16
18
|
|
|
17
|
-
|
|
19
|
+
If no `test:unit` code uses serverconfig, then it would have been okay to not have `server/serverconfig.json`.
|
|
20
|
+
However, it's safer to simply have that file available just in case any imported server code uses serverconfig
|
|
21
|
+
and runs as part of test:unit, and that's why `server/emitImports.js` creates one if it doesn't exist by
|
|
22
|
+
copying `container/ci/serverconfig.json`.
|
|
18
23
|
|
|
19
|
-
`npm run dev` rebundles backend code
|
|
20
24
|
|
|
21
|
-
|
|
25
|
+
## Develop
|
|
22
26
|
|
|
27
|
+
The local development environment is usually triggered following one of these:
|
|
28
|
+
- follow the `Develop` section in [proteinpaint/README.md](https://github.com/stjude/proteinpaint/blob/master/README.md) (preferred)
|
|
29
|
+
- `npm start` from the `server` dir
|
|
23
30
|
|
|
24
31
|
## Test
|
|
25
32
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
33
|
+
To run both type checks and test:unit: `npm run test`.
|
|
34
|
+
|
|
35
|
+
To run type check only: `npx tsc`.
|
|
36
|
+
|
|
37
|
+
To run unit tests only: `npm run test:unit`
|
|
38
|
+
|
|
39
|
+
To run specific test file: `npx tsx path/to/spec.ts`
|
|
40
|
+
|
|
29
41
|
|
|
30
42
|
## Build
|
|
31
43
|
|
package/dataset/termdb.test.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.126.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -16,8 +16,9 @@
|
|
|
16
16
|
"scripts": {
|
|
17
17
|
"dev": "npm run start",
|
|
18
18
|
"prestart": "tsx emitImports.js dev > server.js",
|
|
19
|
-
"start": "tsx watch
|
|
19
|
+
"start": "tsx watch ./start.js",
|
|
20
20
|
"test:unit": "tsx emitImports.js unit > serverTests.js && c8 tsx serverTests.js && rm -rf ./cache",
|
|
21
|
+
"test": "tsc && npm run test:unit",
|
|
21
22
|
"precombined:coverage": "tsx emitImports.js unit > serverTests.js",
|
|
22
23
|
"combined:coverage": "coverageKey=test c8 --all --src=proteinpaint/server --experimental-monocart -r=v8 -r=html -r=json -r=markdown-summary -r=markdown-details -o=./.coverage tsx ./coverage.js & ",
|
|
23
24
|
"postcombined:coverage": "rm -rf ./cache",
|
|
@@ -29,13 +30,7 @@
|
|
|
29
30
|
"build": "./build.sh",
|
|
30
31
|
"prepack": "npm run build",
|
|
31
32
|
"postpack": "./dedupjs.sh",
|
|
32
|
-
"dedup": "./dedupjs.sh"
|
|
33
|
-
"//todo": "refactor or deprecate the scripts below",
|
|
34
|
-
"pretest": "tsc && ./test/pretest.js",
|
|
35
|
-
"pretest:type": "npm run checkers",
|
|
36
|
-
"pretest:integration": "tsc",
|
|
37
|
-
"test:integration": "echo 'TODO: server integration tests'",
|
|
38
|
-
"test:tsc": "tsc --esModuleInterop --noEmit --allowImportingTsExtensions ./shared/types/test/*.type.spec.ts"
|
|
33
|
+
"dedup": "./dedupjs.sh"
|
|
39
34
|
},
|
|
40
35
|
"author": "",
|
|
41
36
|
"license": "SEE LICENSE IN ./LICENSE",
|
|
@@ -67,10 +62,10 @@
|
|
|
67
62
|
"dependencies": {
|
|
68
63
|
"@sjcrh/augen": "2.121.0",
|
|
69
64
|
"@sjcrh/proteinpaint-python": "2.118.0",
|
|
70
|
-
"@sjcrh/proteinpaint-r": "2.
|
|
71
|
-
"@sjcrh/proteinpaint-rust": "2.
|
|
72
|
-
"@sjcrh/proteinpaint-shared": "2.
|
|
73
|
-
"@sjcrh/proteinpaint-types": "2.
|
|
65
|
+
"@sjcrh/proteinpaint-r": "2.126.0",
|
|
66
|
+
"@sjcrh/proteinpaint-rust": "2.126.0",
|
|
67
|
+
"@sjcrh/proteinpaint-shared": "2.126.0",
|
|
68
|
+
"@sjcrh/proteinpaint-types": "2.126.0",
|
|
74
69
|
"@types/express": "^5.0.0",
|
|
75
70
|
"@types/express-session": "^1.18.1",
|
|
76
71
|
"better-sqlite3": "^9.4.1",
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { gdcMafPayload } from "#types/checkers";
|
|
2
|
+
import ky from "ky";
|
|
3
|
+
import { joinUrl } from "#shared/joinUrl.js";
|
|
4
|
+
import serverconfig from "#src/serverconfig.js";
|
|
5
|
+
const maxFileNumber = 1e3;
|
|
6
|
+
const allowedWorkflowType = "Aliquot Ensemble Somatic Variant Merging and Masking";
|
|
7
|
+
const maxTotalSizeCompressed = serverconfig.features.gdcMafMaxFileSize || 4e8;
|
|
8
|
+
const api = {
|
|
9
|
+
endpoint: "gdc/GRIN2list",
|
|
10
|
+
methods: {
|
|
11
|
+
get: {
|
|
12
|
+
...gdcMafPayload,
|
|
13
|
+
init
|
|
14
|
+
},
|
|
15
|
+
post: {
|
|
16
|
+
...gdcMafPayload,
|
|
17
|
+
init
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
function init({ genomes }) {
|
|
22
|
+
return async (req, res) => {
|
|
23
|
+
try {
|
|
24
|
+
const g = genomes.hg38;
|
|
25
|
+
if (!g)
|
|
26
|
+
throw "hg38 missing";
|
|
27
|
+
const ds = g.datasets.GDC;
|
|
28
|
+
if (!ds)
|
|
29
|
+
throw "hg38 GDC missing";
|
|
30
|
+
const payload = await listMafFiles(req.query, ds);
|
|
31
|
+
res.send(payload);
|
|
32
|
+
} catch (e) {
|
|
33
|
+
res.send({ status: "error", error: e.message || e });
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
async function listMafFiles(q, ds) {
|
|
38
|
+
const dataFormatFilter = {
|
|
39
|
+
op: "and",
|
|
40
|
+
content: [{ op: "=", content: { field: "data_format", value: "MAF" } }]
|
|
41
|
+
};
|
|
42
|
+
const filters = {
|
|
43
|
+
op: "and",
|
|
44
|
+
content: [
|
|
45
|
+
dataFormatFilter,
|
|
46
|
+
{ op: "=", content: { field: "experimental_strategy", value: q.experimentalStrategy } },
|
|
47
|
+
{ op: "=", content: { field: "analysis.workflow_type", value: allowedWorkflowType } },
|
|
48
|
+
{ op: "=", content: { field: "access", value: "open" } }
|
|
49
|
+
]
|
|
50
|
+
};
|
|
51
|
+
const case_filters = { op: "and", content: [] };
|
|
52
|
+
if (q.filter0) {
|
|
53
|
+
case_filters.content.push(q.filter0);
|
|
54
|
+
}
|
|
55
|
+
const { host } = ds.getHostHeaders(q);
|
|
56
|
+
const body = {
|
|
57
|
+
filters,
|
|
58
|
+
size: maxFileNumber,
|
|
59
|
+
fields: [
|
|
60
|
+
"id",
|
|
61
|
+
"file_size",
|
|
62
|
+
"cases.project.project_id",
|
|
63
|
+
// for display only
|
|
64
|
+
"cases.case_id",
|
|
65
|
+
// case uuid for making case url link to portal
|
|
66
|
+
"cases.submitter_id",
|
|
67
|
+
// used when listing all cases & files
|
|
68
|
+
"cases.samples.tissue_type",
|
|
69
|
+
"cases.samples.tumor_descriptor"
|
|
70
|
+
].join(",")
|
|
71
|
+
};
|
|
72
|
+
if (case_filters.content.length)
|
|
73
|
+
body.case_filters = case_filters;
|
|
74
|
+
const response = await ky.post(joinUrl(host.rest, "files"), { timeout: false, json: body });
|
|
75
|
+
if (!response.ok)
|
|
76
|
+
throw `HTTP Error: ${response.status} ${response.statusText}`;
|
|
77
|
+
const re = await response.json();
|
|
78
|
+
if (!Number.isInteger(re.data?.pagination?.total))
|
|
79
|
+
throw "re.data.pagination.total is not int";
|
|
80
|
+
if (!Array.isArray(re.data?.hits))
|
|
81
|
+
throw "re.data.hits[] not array";
|
|
82
|
+
const files = [];
|
|
83
|
+
for (const h of re.data.hits) {
|
|
84
|
+
const c = h.cases?.[0];
|
|
85
|
+
if (!c)
|
|
86
|
+
throw "h.cases[0] missing";
|
|
87
|
+
const file = {
|
|
88
|
+
id: h.id,
|
|
89
|
+
project_id: c.project.project_id,
|
|
90
|
+
file_size: h.file_size,
|
|
91
|
+
case_submitter_id: c.submitter_id,
|
|
92
|
+
case_uuid: c.case_id,
|
|
93
|
+
sample_types: []
|
|
94
|
+
};
|
|
95
|
+
if (c.samples) {
|
|
96
|
+
let normalTypeName;
|
|
97
|
+
for (const { tumor_descriptor, tissue_type } of c.samples) {
|
|
98
|
+
if (tissue_type == "Normal") {
|
|
99
|
+
normalTypeName = (tumor_descriptor == "Not Applicable" ? "" : tumor_descriptor + " ") + tissue_type;
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
file.sample_types.push(tumor_descriptor + " " + tissue_type);
|
|
103
|
+
}
|
|
104
|
+
if (normalTypeName)
|
|
105
|
+
file.sample_types.push(normalTypeName);
|
|
106
|
+
}
|
|
107
|
+
file.sample_types = [...new Set(file.sample_types)];
|
|
108
|
+
files.push(file);
|
|
109
|
+
}
|
|
110
|
+
files.sort((a, b) => b.file_size - a.file_size);
|
|
111
|
+
const result = {
|
|
112
|
+
files,
|
|
113
|
+
filesTotal: re.data.pagination.total,
|
|
114
|
+
maxTotalSizeCompressed
|
|
115
|
+
};
|
|
116
|
+
return result;
|
|
117
|
+
}
|
|
118
|
+
export {
|
|
119
|
+
api,
|
|
120
|
+
maxTotalSizeCompressed
|
|
121
|
+
};
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { runGRIN2Payload } from "#types/checkers";
|
|
2
|
+
import { run_rust } from "@sjcrh/proteinpaint-rust";
|
|
3
|
+
import { run_R } from "@sjcrh/proteinpaint-r";
|
|
4
|
+
import serverconfig from "#src/serverconfig.js";
|
|
5
|
+
import path from "path";
|
|
6
|
+
const api = {
|
|
7
|
+
endpoint: "gdc/runGRIN2",
|
|
8
|
+
methods: {
|
|
9
|
+
get: {
|
|
10
|
+
...runGRIN2Payload,
|
|
11
|
+
init
|
|
12
|
+
},
|
|
13
|
+
post: {
|
|
14
|
+
...runGRIN2Payload,
|
|
15
|
+
init
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
function init({ genomes }) {
|
|
20
|
+
return async (req, res) => {
|
|
21
|
+
try {
|
|
22
|
+
console.log("[GRIN2] Validating genome configuration");
|
|
23
|
+
const g = genomes.hg38;
|
|
24
|
+
if (!g)
|
|
25
|
+
throw "hg38 missing";
|
|
26
|
+
const ds = g.datasets.GDC;
|
|
27
|
+
if (!ds)
|
|
28
|
+
throw "hg38 GDC missing";
|
|
29
|
+
const caseFiles = req.query;
|
|
30
|
+
console.log(`[GRIN2] Request received: ${JSON.stringify(caseFiles)}`);
|
|
31
|
+
if (!caseFiles) {
|
|
32
|
+
throw "Missing or invalid cases data";
|
|
33
|
+
}
|
|
34
|
+
try {
|
|
35
|
+
console.log("[GRIN2] Calling Rust for file processing...");
|
|
36
|
+
const rustInput = JSON.stringify(caseFiles);
|
|
37
|
+
console.log("[GRIN2] Executing Rust function...");
|
|
38
|
+
const rustResult = await run_rust("gdcGRIN2", rustInput);
|
|
39
|
+
console.log("[GRIN2] Rust execution completed");
|
|
40
|
+
console.log(`[GRIN2] Rust result type: ${typeof rustResult}`);
|
|
41
|
+
if (!rustResult) {
|
|
42
|
+
throw new Error("Failed to process MAF files: No result from Rust");
|
|
43
|
+
}
|
|
44
|
+
let parsedRustResult;
|
|
45
|
+
try {
|
|
46
|
+
parsedRustResult = typeof rustResult === "string" ? JSON.parse(rustResult) : rustResult;
|
|
47
|
+
console.log(`[GRIN2] Parsed Rust result: ${JSON.stringify(parsedRustResult).substring(0, 200)}...`);
|
|
48
|
+
} catch (parseError) {
|
|
49
|
+
console.error("[GRIN2] Error parsing Rust result:", parseError);
|
|
50
|
+
}
|
|
51
|
+
const genedbfile = path.join(serverconfig.tpmasterdir, g.genedb.dbfile);
|
|
52
|
+
const imagefile = path.join(serverconfig.cachedir, `grin2_${Date.now()}_${Math.floor(Math.random() * 1e9)}.png`);
|
|
53
|
+
const rInput = JSON.stringify({
|
|
54
|
+
genedb: genedbfile,
|
|
55
|
+
chromosomelist: g.majorchr,
|
|
56
|
+
imagefile,
|
|
57
|
+
lesion: rustResult
|
|
58
|
+
// The mutation string from Rust
|
|
59
|
+
});
|
|
60
|
+
console.log(`R input: ${rInput}`);
|
|
61
|
+
const parsedInput = JSON.parse(rInput);
|
|
62
|
+
console.log("Parsed lesion data type:", typeof parsedInput.lesion);
|
|
63
|
+
console.log(
|
|
64
|
+
"Parsed lesion data length:",
|
|
65
|
+
typeof parsedInput.lesion === "string" ? parsedInput.lesion.length : "not a string"
|
|
66
|
+
);
|
|
67
|
+
console.log("[GRIN2] Executing R script...");
|
|
68
|
+
const rResult = await run_R("gdcGRIN2.R", rInput, []);
|
|
69
|
+
console.log(`[GRIN2] R execution completed, result: ${rResult}`);
|
|
70
|
+
let resultData;
|
|
71
|
+
try {
|
|
72
|
+
resultData = JSON.parse(rResult);
|
|
73
|
+
console.log("[GRIN2] Finished R analysis");
|
|
74
|
+
const pngImg = resultData.png[0];
|
|
75
|
+
return res.json({ pngImg });
|
|
76
|
+
} catch (parseError) {
|
|
77
|
+
console.error("[GRIN2] Error parsing R result:", parseError);
|
|
78
|
+
console.log("[GRIN2] Raw R result:", rResult);
|
|
79
|
+
}
|
|
80
|
+
} finally {
|
|
81
|
+
}
|
|
82
|
+
} catch (e) {
|
|
83
|
+
console.error("[GRIN2] Error running analysis:", e);
|
|
84
|
+
console.error("[GRIN2] Error stack:", e.stack);
|
|
85
|
+
const errorResponse = {
|
|
86
|
+
status: "error",
|
|
87
|
+
error: e.message || String(e)
|
|
88
|
+
};
|
|
89
|
+
console.log(`[GRIN2] Sending error response: ${JSON.stringify(errorResponse)}`);
|
|
90
|
+
res.status(500).send(errorResponse);
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
export {
|
|
95
|
+
api
|
|
96
|
+
};
|
|
@@ -3,8 +3,8 @@ import fs from "fs";
|
|
|
3
3
|
import path from "path";
|
|
4
4
|
import serverconfig from "#src/serverconfig.js";
|
|
5
5
|
import { run_python } from "@sjcrh/proteinpaint-python";
|
|
6
|
+
import { run_rust } from "@sjcrh/proteinpaint-rust";
|
|
6
7
|
import { mayLog } from "#src/helpers.ts";
|
|
7
|
-
import { DeleteCacheFiles } from "#src/DeleteCacheFiles.ts";
|
|
8
8
|
const api = {
|
|
9
9
|
endpoint: "genesetEnrichment",
|
|
10
10
|
methods: {
|
|
@@ -18,6 +18,7 @@ const api = {
|
|
|
18
18
|
}
|
|
19
19
|
}
|
|
20
20
|
};
|
|
21
|
+
const cachedir_gsea = path.join(serverconfig.cachedir, "gsea");
|
|
21
22
|
function init({ genomes }) {
|
|
22
23
|
return async (req, res) => {
|
|
23
24
|
try {
|
|
@@ -48,26 +49,27 @@ function init({ genomes }) {
|
|
|
48
49
|
async function run_genesetEnrichment_analysis(q, genomes) {
|
|
49
50
|
if (!genomes[q.genome].termdbs)
|
|
50
51
|
throw "termdb database is not available for " + q.genome;
|
|
51
|
-
const cache = new DeleteCacheFiles({
|
|
52
|
-
cachedir: serverconfig.cachedir_gsea,
|
|
53
|
-
fileExtensions: [".pkl"],
|
|
54
|
-
maxSize: 1e6
|
|
55
|
-
});
|
|
56
|
-
await cache.mayResetCacheCheckTimeout(cache.checkWait);
|
|
57
52
|
const genesetenrichment_input = {
|
|
58
53
|
genes: q.genes,
|
|
59
54
|
fold_change: q.fold_change,
|
|
60
55
|
db: genomes[q.genome].termdbs.msigdb.cohort.db.connection.name,
|
|
61
56
|
// For now msigdb has been added, but later databases other than msigdb may be used
|
|
62
57
|
geneset_group: q.geneSetGroup,
|
|
63
|
-
cachedir: serverconfig.cachedir_gsea,
|
|
64
|
-
geneset_name: q.geneset_name,
|
|
65
|
-
pickle_file: q.pickle_file,
|
|
66
|
-
num_permutations: q.num_permutations,
|
|
67
58
|
genedb: path.join(serverconfig.tpmasterdir, genomes[q.genome].genedb.dbfile),
|
|
68
59
|
filter_non_coding_genes: q.filter_non_coding_genes
|
|
69
60
|
};
|
|
70
|
-
|
|
61
|
+
let gsea_output;
|
|
62
|
+
if (q.method == "blitzgsea") {
|
|
63
|
+
genesetenrichment_input.cachedir = cachedir_gsea;
|
|
64
|
+
genesetenrichment_input.pickle_file = q.pickle_file;
|
|
65
|
+
genesetenrichment_input.geneset_name = q.geneset_name;
|
|
66
|
+
genesetenrichment_input.num_permutations = q.num_permutations;
|
|
67
|
+
gsea_output = await run_python("gsea.py", "/" + JSON.stringify(genesetenrichment_input));
|
|
68
|
+
} else if (q.method == "cerno") {
|
|
69
|
+
gsea_output = await run_rust("cerno", JSON.stringify(genesetenrichment_input));
|
|
70
|
+
} else {
|
|
71
|
+
throw "Unknown method:" + q.method;
|
|
72
|
+
}
|
|
71
73
|
let result;
|
|
72
74
|
let data_found = false;
|
|
73
75
|
let image_found = false;
|
|
@@ -85,7 +87,7 @@ async function run_genesetEnrichment_analysis(q, genomes) {
|
|
|
85
87
|
if (data_found)
|
|
86
88
|
return result;
|
|
87
89
|
if (image_found)
|
|
88
|
-
return path.join(
|
|
90
|
+
return path.join(cachedir_gsea, result.image_file);
|
|
89
91
|
throw "data or image not found in gsea output; this should not happen";
|
|
90
92
|
}
|
|
91
93
|
export {
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -36,7 +36,7 @@ function init({ genomes }) {
|
|
|
36
36
|
const ds = g.datasets[q.dslabel];
|
|
37
37
|
if (!ds)
|
|
38
38
|
throw "invalid dataset name";
|
|
39
|
-
if (
|
|
39
|
+
if (!ds.__gdc?.doneCaching)
|
|
40
40
|
throw "The server has not finished caching the case IDs: try again in about 2 minutes.";
|
|
41
41
|
if ([TermTypes.GENE_EXPRESSION, TermTypes.METABOLITE_INTENSITY, NUMERIC_DICTIONARY_TERM].includes(q.dataType)) {
|
|
42
42
|
if (!ds.queries?.[q.dataType] && q.dataType !== NUMERIC_DICTIONARY_TERM)
|