@sjcrh/proteinpaint-server 2.34.1 → 2.35.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/routes/termdb.singleSampleMutation.ts +87 -0
- package/routes/termdb.singlecellData.ts +8 -4
- package/routes/termdb.singlecellSamples.ts +28 -21
- package/routes/termdb.topVariablyExpressedGenes.ts +161 -0
- package/server.js +2 -1
- package/server.js.map +1 -0
- package/utils/edge.R +14 -14
- package/utils/fastclust.R +28 -21
- package/routes/gdc.topVariablyExpressedGenes.ts +0 -124
package/utils/edge.R
CHANGED
|
@@ -4,20 +4,20 @@
|
|
|
4
4
|
|
|
5
5
|
# Checking if all R packages are installed or not, if not installing each one of them
|
|
6
6
|
|
|
7
|
-
jsonlite_path <- system.file(package='jsonlite')
|
|
8
|
-
if (nchar(jsonlite_path) == 0) {
|
|
9
|
-
install.packages("jsonlite", repos='https://cran.case.edu/')
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
edgeR_path <- system.file(package='edgeR')
|
|
13
|
-
if (nchar(edgeR_path) == 0) {
|
|
14
|
-
BiocManager::install("edgeR")
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
readr_path <- system.file(package='readr')
|
|
18
|
-
if (nchar(readr_path) == 0) {
|
|
19
|
-
install.packages("readr", repos='https://cran.case.edu/')
|
|
20
|
-
}
|
|
7
|
+
#jsonlite_path <- system.file(package='jsonlite')
|
|
8
|
+
#if (nchar(jsonlite_path) == 0) {
|
|
9
|
+
# install.packages("jsonlite", repos='https://cran.case.edu/')
|
|
10
|
+
#}
|
|
11
|
+
#
|
|
12
|
+
#edgeR_path <- system.file(package='edgeR')
|
|
13
|
+
#if (nchar(edgeR_path) == 0) {
|
|
14
|
+
# BiocManager::install("edgeR")
|
|
15
|
+
#}
|
|
16
|
+
#
|
|
17
|
+
#readr_path <- system.file(package='readr')
|
|
18
|
+
#if (nchar(readr_path) == 0) {
|
|
19
|
+
# install.packages("readr", repos='https://cran.case.edu/')
|
|
20
|
+
#}
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
library(jsonlite)
|
package/utils/fastclust.R
CHANGED
|
@@ -3,27 +3,34 @@
|
|
|
3
3
|
|
|
4
4
|
# Image is in Rplots.pdf
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
6
|
+
############################
|
|
7
|
+
# !!! NOTE !!! #
|
|
8
|
+
############################
|
|
9
|
+
# must not auto-install missing package in any R script!
|
|
10
|
+
# declare required packages in dockerfile
|
|
11
|
+
# at 2023/12, a problem emerged for pp container running in gdc qa-pink
|
|
12
|
+
# since the docker images lacks the packages, but the auto-install was prevented due to container safety (no internet query)
|
|
13
|
+
# this script will not run, leading to hard to decipher crashing
|
|
14
|
+
|
|
15
|
+
#ggplot2_path <- system.file(package='ggplot2')
|
|
16
|
+
#if (nchar(ggplot2_path) == 0) {
|
|
17
|
+
# install.packages("ggplot2", repos='https://cran.case.edu/')
|
|
18
|
+
#}
|
|
19
|
+
#
|
|
20
|
+
#jsonlite_path <- system.file(package='jsonlite')
|
|
21
|
+
#if (nchar(jsonlite_path) == 0) {
|
|
22
|
+
# install.packages("jsonlite", repos='https://cran.case.edu/')
|
|
23
|
+
#}
|
|
24
|
+
#
|
|
25
|
+
#dendextend_path <- system.file(package='dendextend')
|
|
26
|
+
#if (nchar(dendextend_path) == 0) {
|
|
27
|
+
# install.packages("dendextend", repos='https://cran.case.edu/')
|
|
28
|
+
#}
|
|
29
|
+
#
|
|
30
|
+
#reshape_path <- system.file(package='reshape')
|
|
31
|
+
#if (nchar(reshape_path) == 0) {
|
|
32
|
+
# install.packages("reshape", repos='https://cran.case.edu/')
|
|
33
|
+
#}
|
|
27
34
|
|
|
28
35
|
#flashClust_path <- system.file(package='flashClust')
|
|
29
36
|
#if (nchar(flashClust_path) == 0) {
|
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
GdcTopVariablyExpressedGenesRequest,
|
|
3
|
-
GdcTopVariablyExpressedGenesResponse
|
|
4
|
-
} from '#shared/types/routes/gdc.topVariablyExpressedGenes.ts'
|
|
5
|
-
import { getCasesWithExressionDataFromCohort } from '../src/mds3.gdc.js'
|
|
6
|
-
import path from 'path'
|
|
7
|
-
import got from 'got'
|
|
8
|
-
import serverconfig from '#src/serverconfig.js'
|
|
9
|
-
|
|
10
|
-
// TODO make it general purpose based on ds.queries.geneExpression.topVariablyExpressedGenes{}; wait till case/gene link changes are done
|
|
11
|
-
// https://github.com/NCI-GDC/gdcapi/blob/develop/openapi/gene-expression.yaml
|
|
12
|
-
const apihost = process.env.PP_GDC_HOST || 'https://api.gdc.cancer.gov'
|
|
13
|
-
// may override the geneExpHost for developers without access to qa/portal environments
|
|
14
|
-
const geneExpHost = serverconfig.features?.geneExpHost || apihost
|
|
15
|
-
|
|
16
|
-
const gdcGenome = 'hg38'
|
|
17
|
-
const gdcDslabel = 'GDC'
|
|
18
|
-
|
|
19
|
-
export const api = {
|
|
20
|
-
endpoint: 'gdc/topVariablyExpressedGenes',
|
|
21
|
-
methods: {
|
|
22
|
-
get: {
|
|
23
|
-
init,
|
|
24
|
-
request: {
|
|
25
|
-
typeId: 'GdcTopVariablyExpressedGenesRequest'
|
|
26
|
-
},
|
|
27
|
-
response: {
|
|
28
|
-
typeId: 'GdcTopVariablyExpressedGenesResponse'
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
function init({ genomes }) {
|
|
35
|
-
return async (req: any, res: any): Promise<void> => {
|
|
36
|
-
try {
|
|
37
|
-
// following logic requires hg38 gdc dataset
|
|
38
|
-
const genome = genomes[gdcGenome]
|
|
39
|
-
if (!genome) throw 'hg38 genome missing'
|
|
40
|
-
const ds = genome.datasets?.[gdcDslabel]
|
|
41
|
-
if (!ds) throw 'gdc dataset missing'
|
|
42
|
-
const genes = await getGenes(req.query as GdcTopVariablyExpressedGenesRequest, ds, genome)
|
|
43
|
-
const payload = { genes } as GdcTopVariablyExpressedGenesResponse
|
|
44
|
-
res.send(payload)
|
|
45
|
-
} catch (e: any) {
|
|
46
|
-
res.send({ status: 'error', error: e.message || e })
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/*
|
|
52
|
-
*/
|
|
53
|
-
async function getGenes(q: GdcTopVariablyExpressedGenesRequest, ds: any, genome: any) {
|
|
54
|
-
if (serverconfig.features.gdcGenes) {
|
|
55
|
-
// for testing only; delete when api issue is resolved
|
|
56
|
-
return serverconfig.features.gdcGenes as string[]
|
|
57
|
-
}
|
|
58
|
-
if (!ds.__gdc.doneCaching) {
|
|
59
|
-
throw `The server has not finished caching the case IDs: try again in ~2 minutes`
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
// based on current cohort, get list of cases with exp data, as input of next api query
|
|
63
|
-
const caseLst = await getCasesWithExressionDataFromCohort(q, ds)
|
|
64
|
-
if (caseLst.length == 0) {
|
|
65
|
-
// there are no cases with gene exp data
|
|
66
|
-
return [] as string[]
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// change to this when api is available on prod
|
|
70
|
-
const url = path.join(geneExpHost, '/gene_expression/gene_selection')
|
|
71
|
-
|
|
72
|
-
try {
|
|
73
|
-
const response = await got.post(url, {
|
|
74
|
-
headers: { 'Content-Type': 'application/json', Accept: 'application/json' },
|
|
75
|
-
body: JSON.stringify({
|
|
76
|
-
// !!! temporarily limit the case_ids length, otherwise the request times out !!!
|
|
77
|
-
case_ids: caseLst.slice(0, 20),
|
|
78
|
-
|
|
79
|
-
// temporary!! restrict pool to cgc due to slow api. delete when new api is online
|
|
80
|
-
gene_ids: tempGetCGCgenes(genome),
|
|
81
|
-
|
|
82
|
-
// when gene_ids is deleted, enable this
|
|
83
|
-
//gene_type: 'protein_coding',
|
|
84
|
-
|
|
85
|
-
selection_size: Number(q.maxGenes) // FIXME it's defined as number but why it's string??
|
|
86
|
-
})
|
|
87
|
-
})
|
|
88
|
-
|
|
89
|
-
const re = JSON.parse(response.body)
|
|
90
|
-
// {"gene_selection":[{"gene_id":"ENSG00000141510","log2_uqfpkm_median":3.103430497010492,"log2_uqfpkm_stddev":0.8692021350485105,"symbol":"TP53"}, ... ]}
|
|
91
|
-
|
|
92
|
-
const genes = [] as string[]
|
|
93
|
-
if (!Array.isArray(re.gene_selection)) throw 're.gene_selection[] is not array'
|
|
94
|
-
for (const i of re.gene_selection) {
|
|
95
|
-
if (i.gene_id && typeof i.gene_id == 'string') {
|
|
96
|
-
// is ensg, convert to symbol
|
|
97
|
-
const t = genome.genedb.getNameByAlias.get(i.gene_id)
|
|
98
|
-
if (t) genes.push(t.name) // ensg
|
|
99
|
-
} else if (i.symbol && typeof i.symbol == 'string') {
|
|
100
|
-
genes.push(i.symbol)
|
|
101
|
-
} else {
|
|
102
|
-
throw 'one of re.gene_selection[] is missing both gene_id and symbol'
|
|
103
|
-
}
|
|
104
|
-
}
|
|
105
|
-
return genes
|
|
106
|
-
} catch (e: any) {
|
|
107
|
-
console.log(e.stack || e)
|
|
108
|
-
throw e
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
function tempGetCGCgenes(genome: any) {
|
|
113
|
-
const lst = [] as string[] // list of ENSG ids from cgc genes
|
|
114
|
-
// don't think there's need to preparse genome.geneset, as this function is only temporary
|
|
115
|
-
for (const s of genome.geneset[0].lst) {
|
|
116
|
-
const a = genome.genedb.getAliasByName.all(s)
|
|
117
|
-
if (a) {
|
|
118
|
-
for (const b of a) {
|
|
119
|
-
if (b.alias.startsWith('ENSG')) lst.push(b.alias)
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
return lst
|
|
124
|
-
}
|