@sjcrh/proteinpaint-shared 2.180.0 → 2.180.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/bulk.cnv.js +30 -30
- package/src/bulk.del.js +48 -48
- package/src/bulk.itd.js +48 -48
- package/src/bulk.js +31 -31
- package/src/bulk.snv.js +109 -72
- package/src/bulk.sv.js +78 -78
- package/src/bulk.svjson.js +33 -31
- package/src/bulk.trunc.js +53 -47
- package/src/clustering.js +27 -27
- package/src/common.js +665 -558
- package/src/compute.percentile.js +3 -1
- package/src/fetch-helpers.js +67 -42
- package/src/fileSize.js +4 -4
- package/src/filter.js +207 -179
- package/src/hash.js +8 -5
- package/src/helpers.js +17 -9
- package/src/index.js +24 -24
- package/src/mds3tk.js +14 -12
- package/src/roundValue.js +5 -4
- package/src/termdb.bins.js +151 -84
- package/src/termdb.initbinconfig.js +46 -18
- package/src/termdb.usecase.js +125 -116
- package/src/terms.js +281 -266
- package/src/tree.js +4 -4
- package/src/vcf.ann.js +9 -9
- package/src/vcf.csq.js +8 -8
- package/src/vcf.info.js +3 -3
- package/src/vcf.js +99 -74
- package/src/vcf.type.js +8 -2
package/src/bulk.svjson.js
CHANGED
|
@@ -4,15 +4,15 @@
|
|
|
4
4
|
//
|
|
5
5
|
/////////////////////////////////
|
|
6
6
|
|
|
7
|
-
import * as common from
|
|
8
|
-
import * as bulk from
|
|
7
|
+
import * as common from "./common.js"
|
|
8
|
+
import * as bulk from "./bulk.js"
|
|
9
9
|
|
|
10
10
|
// work for both sv/fusion
|
|
11
11
|
// must tell if the data is fusion or sv
|
|
12
12
|
|
|
13
13
|
export function parseheader(line, flag) {
|
|
14
|
-
const header = line.toLowerCase().split(
|
|
15
|
-
if (header.length <= 1) return
|
|
14
|
+
const header = line.toLowerCase().split("\t")
|
|
15
|
+
if (header.length <= 1) return "invalid file header for svjson"
|
|
16
16
|
const htry = (...lst) => {
|
|
17
17
|
for (const a of lst) {
|
|
18
18
|
const j = header.indexOf(a)
|
|
@@ -20,21 +20,21 @@ export function parseheader(line, flag) {
|
|
|
20
20
|
}
|
|
21
21
|
return -1
|
|
22
22
|
}
|
|
23
|
-
let i = htry(
|
|
24
|
-
if (i != -1) header[i] =
|
|
25
|
-
i = htry(
|
|
26
|
-
if (i != -1) header[i] =
|
|
27
|
-
i = htry(
|
|
28
|
-
if (i != -1) header[i] =
|
|
29
|
-
i = htry(
|
|
30
|
-
if (i == -1) return [
|
|
31
|
-
header[i] =
|
|
23
|
+
let i = htry("sample")
|
|
24
|
+
if (i != -1) header[i] = "sample"
|
|
25
|
+
i = htry("sampletype")
|
|
26
|
+
if (i != -1) header[i] = "sampletype"
|
|
27
|
+
i = htry("patient")
|
|
28
|
+
if (i != -1) header[i] = "patient"
|
|
29
|
+
i = htry("json", "jsontext")
|
|
30
|
+
if (i == -1) return ["json missing from header"]
|
|
31
|
+
header[i] = "jsontext"
|
|
32
32
|
return [null, header]
|
|
33
33
|
}
|
|
34
34
|
|
|
35
35
|
export function parseline(i, line, flag, header) {
|
|
36
|
-
if (line ==
|
|
37
|
-
const lst = line.split(
|
|
36
|
+
if (line == "" || line[0] == "#") return
|
|
37
|
+
const lst = line.split("\t")
|
|
38
38
|
const m = {}
|
|
39
39
|
const badlines = flag.svjson.badlines
|
|
40
40
|
|
|
@@ -42,7 +42,7 @@ export function parseline(i, line, flag, header) {
|
|
|
42
42
|
m[header[j]] = lst[j]
|
|
43
43
|
}
|
|
44
44
|
if (!m.jsontext) {
|
|
45
|
-
badlines.push([i,
|
|
45
|
+
badlines.push([i, "missing jsontext", lst])
|
|
46
46
|
return
|
|
47
47
|
}
|
|
48
48
|
if (bulk.parsesample(m, flag, i, lst, badlines)) {
|
|
@@ -52,7 +52,7 @@ export function parseline(i, line, flag, header) {
|
|
|
52
52
|
try {
|
|
53
53
|
json = JSON.parse(m.jsontext)
|
|
54
54
|
} catch (e) {
|
|
55
|
-
badlines.push([i,
|
|
55
|
+
badlines.push([i, "invalid JSON text", lst])
|
|
56
56
|
return
|
|
57
57
|
}
|
|
58
58
|
// duplicating logic in pediatric.js
|
|
@@ -65,10 +65,10 @@ export function parseline(i, line, flag, header) {
|
|
|
65
65
|
dt: common.dtfusionrna,
|
|
66
66
|
class: common.mclassfusionrna,
|
|
67
67
|
isoform: pair.a.isoform,
|
|
68
|
-
mname: pair.b.name
|
|
68
|
+
mname: pair.b.name,
|
|
69
69
|
}
|
|
70
70
|
for (const k in m) {
|
|
71
|
-
if (k !=
|
|
71
|
+
if (k != "jsontext") m2[k] = m[k]
|
|
72
72
|
}
|
|
73
73
|
m2.pairlst = duplicate(json)
|
|
74
74
|
const n = pair.a.name.toUpperCase()
|
|
@@ -83,10 +83,10 @@ export function parseline(i, line, flag, header) {
|
|
|
83
83
|
dt: common.dtfusionrna,
|
|
84
84
|
class: common.mclassfusionrna,
|
|
85
85
|
isoform: pair.b.isoform,
|
|
86
|
-
mname: pair.a.name
|
|
86
|
+
mname: pair.a.name,
|
|
87
87
|
}
|
|
88
88
|
for (const k in m) {
|
|
89
|
-
if (k !=
|
|
89
|
+
if (k != "jsontext") m2[k] = m[k]
|
|
90
90
|
}
|
|
91
91
|
m2.pairlst = duplicate(json)
|
|
92
92
|
const n = pair.b.name.toUpperCase()
|
|
@@ -102,40 +102,42 @@ export function parseline(i, line, flag, header) {
|
|
|
102
102
|
switch (json.dt) {
|
|
103
103
|
case common.dtitd:
|
|
104
104
|
json.class = common.mclassitd
|
|
105
|
-
json.mname =
|
|
105
|
+
json.mname = "ITD"
|
|
106
106
|
break
|
|
107
107
|
case common.dtnloss:
|
|
108
108
|
json.class = common.mclassnloss
|
|
109
|
-
json.mname =
|
|
109
|
+
json.mname = "N-loss"
|
|
110
110
|
break
|
|
111
111
|
case common.dtcloss:
|
|
112
112
|
json.class = common.mclasscloss
|
|
113
|
-
json.mname =
|
|
113
|
+
json.mname = "C-loss"
|
|
114
114
|
break
|
|
115
115
|
case common.dtdel:
|
|
116
116
|
json.class = common.mclassdel
|
|
117
|
-
json.mname =
|
|
117
|
+
json.mname = "Del"
|
|
118
118
|
break
|
|
119
119
|
case common.dtsv:
|
|
120
120
|
json.class = common.mclasssv
|
|
121
|
-
json.mname =
|
|
121
|
+
json.mname = "SV"
|
|
122
122
|
break
|
|
123
123
|
default:
|
|
124
|
-
badlines.push([i,
|
|
124
|
+
badlines.push([i, "unknown datatype", lst])
|
|
125
125
|
return
|
|
126
126
|
}
|
|
127
127
|
// record only about a single gene
|
|
128
128
|
if (!json.gene) {
|
|
129
|
-
badlines.push([i,
|
|
129
|
+
badlines.push([i, "json.gene missing", lst])
|
|
130
130
|
return
|
|
131
131
|
}
|
|
132
132
|
flag.good++
|
|
133
133
|
for (const k in m) {
|
|
134
|
-
if (k !=
|
|
134
|
+
if (k != "jsontext") {
|
|
135
135
|
json[k] = m[k]
|
|
136
136
|
}
|
|
137
137
|
}
|
|
138
|
-
const n = flag.geneToUpper
|
|
138
|
+
const n = flag.geneToUpper
|
|
139
|
+
? json.gene.toUpperCase()
|
|
140
|
+
: json.gene.toUpperCase()
|
|
139
141
|
if (!flag.data[n]) {
|
|
140
142
|
flag.data[n] = []
|
|
141
143
|
}
|
|
@@ -148,7 +150,7 @@ function duplicate(lst) {
|
|
|
148
150
|
for (const pair of lst) {
|
|
149
151
|
const p = { a: {}, b: {} }
|
|
150
152
|
for (const k in pair) {
|
|
151
|
-
if (k !=
|
|
153
|
+
if (k != "a" && k != "b") p[k] = pair[k]
|
|
152
154
|
}
|
|
153
155
|
for (const k in pair.a) {
|
|
154
156
|
p.a[k] = pair.a[k]
|
package/src/bulk.trunc.js
CHANGED
|
@@ -4,12 +4,12 @@
|
|
|
4
4
|
//
|
|
5
5
|
////////////////////////////////////
|
|
6
6
|
|
|
7
|
-
import * as common from
|
|
8
|
-
import * as bulk from
|
|
7
|
+
import * as common from "./common.js"
|
|
8
|
+
import * as bulk from "./bulk.js"
|
|
9
9
|
|
|
10
10
|
export function parseheader(line, flag) {
|
|
11
|
-
const header = line.toLowerCase().split(
|
|
12
|
-
if (header.length <= 1) return
|
|
11
|
+
const header = line.toLowerCase().split("\t")
|
|
12
|
+
if (header.length <= 1) return "invalid header line for truncation"
|
|
13
13
|
const htry = (...lst) => {
|
|
14
14
|
for (const e of lst) {
|
|
15
15
|
const j = header.indexOf(e)
|
|
@@ -17,74 +17,80 @@ export function parseheader(line, flag) {
|
|
|
17
17
|
}
|
|
18
18
|
return -1
|
|
19
19
|
}
|
|
20
|
-
let i = htry(
|
|
21
|
-
if (i == -1) return
|
|
22
|
-
header[i] =
|
|
20
|
+
let i = htry("gene")
|
|
21
|
+
if (i == -1) return "gene missing from header"
|
|
22
|
+
header[i] = "gene"
|
|
23
23
|
i = htry(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
24
|
+
"annovar_isoform",
|
|
25
|
+
"mrna_accession",
|
|
26
|
+
"mrna accession",
|
|
27
|
+
"refseq_mrna_id",
|
|
28
|
+
"annovar_sj_filter_isoform",
|
|
29
|
+
"refseq",
|
|
30
|
+
"isoform"
|
|
31
31
|
)
|
|
32
|
-
if (i == -1) return
|
|
33
|
-
header[i] =
|
|
32
|
+
if (i == -1) return "isoform missing from header"
|
|
33
|
+
header[i] = "isoform"
|
|
34
34
|
let hasrnapos = false
|
|
35
|
-
i = htry(
|
|
35
|
+
i = htry("rnaposition")
|
|
36
36
|
if (i != -1) {
|
|
37
|
-
header[i] =
|
|
37
|
+
header[i] = "rnaposition"
|
|
38
38
|
hasrnapos = true
|
|
39
39
|
}
|
|
40
|
-
i = htry(
|
|
41
|
-
if (i == -1) return
|
|
42
|
-
header[i] =
|
|
40
|
+
i = htry("losstype")
|
|
41
|
+
if (i == -1) return "lossType missing from header"
|
|
42
|
+
header[i] = "losstype"
|
|
43
43
|
let hasgenomic = false
|
|
44
|
-
i = htry(
|
|
44
|
+
i = htry("chromosome", "chr")
|
|
45
45
|
if (i != -1) {
|
|
46
|
-
header[i] =
|
|
47
|
-
i = htry(
|
|
46
|
+
header[i] = "chr"
|
|
47
|
+
i = htry(
|
|
48
|
+
"start",
|
|
49
|
+
"start_position",
|
|
50
|
+
"wu_hg19_pos",
|
|
51
|
+
"chr_position",
|
|
52
|
+
"position"
|
|
53
|
+
)
|
|
48
54
|
if (i == -1) {
|
|
49
|
-
return
|
|
55
|
+
return "genomic position missing from header"
|
|
50
56
|
}
|
|
51
|
-
header[i] =
|
|
57
|
+
header[i] = "pos"
|
|
52
58
|
hasgenomic = true
|
|
53
59
|
}
|
|
54
60
|
if (!hasrnapos && !hasgenomic) {
|
|
55
|
-
return
|
|
61
|
+
return "neither rnaposition nor genomic position is given"
|
|
56
62
|
}
|
|
57
63
|
|
|
58
|
-
i = htry(
|
|
59
|
-
if (i != -1) header[i] =
|
|
60
|
-
i = htry(
|
|
61
|
-
if (i != -1) header[i] =
|
|
62
|
-
i = htry(
|
|
63
|
-
if (i != -1) header[i] =
|
|
64
|
-
i = htry(
|
|
65
|
-
if (i != -1) header[i] =
|
|
66
|
-
i = htry(
|
|
67
|
-
if (i != -1) header[i] =
|
|
64
|
+
i = htry("sample", "sample_name", "tumor_sample_barcode")
|
|
65
|
+
if (i != -1) header[i] = "sample"
|
|
66
|
+
i = htry("patient", "donor", "target_case_id")
|
|
67
|
+
if (i != -1) header[i] = "patient"
|
|
68
|
+
i = htry("disease")
|
|
69
|
+
if (i != -1) header[i] = "disease"
|
|
70
|
+
i = htry("origin")
|
|
71
|
+
if (i != -1) header[i] = "origin"
|
|
72
|
+
i = htry("sampletype", "sample type", "sample_type")
|
|
73
|
+
if (i != -1) header[i] = "sampletype"
|
|
68
74
|
flag.truncation.header = header
|
|
69
75
|
flag.truncation.loaded = true
|
|
70
76
|
return false
|
|
71
77
|
}
|
|
72
78
|
|
|
73
79
|
export function parseline(i, line, flag) {
|
|
74
|
-
if (line ==
|
|
75
|
-
const lst = line.split(
|
|
80
|
+
if (line == "" || line[0] == "#") return
|
|
81
|
+
const lst = line.split("\t")
|
|
76
82
|
const m = {}
|
|
77
83
|
for (let j = 0; j < flag.truncation.header.length; j++) {
|
|
78
84
|
m[flag.truncation.header[j]] = lst[j]
|
|
79
85
|
}
|
|
80
86
|
if (!m.gene) {
|
|
81
|
-
flag.truncation.badlines.push([i,
|
|
87
|
+
flag.truncation.badlines.push([i, "missing gene", lst])
|
|
82
88
|
return
|
|
83
89
|
}
|
|
84
90
|
if (m.rnaposition) {
|
|
85
91
|
const v = Number.parseInt(m.rnaposition)
|
|
86
92
|
if (Number.isNaN(v) || v < 0) {
|
|
87
|
-
flag.truncation.badlines.push([i,
|
|
93
|
+
flag.truncation.badlines.push([i, "invalid rnaPosition value", lst])
|
|
88
94
|
return
|
|
89
95
|
}
|
|
90
96
|
m.rnaposition = v
|
|
@@ -92,30 +98,30 @@ export function parseline(i, line, flag) {
|
|
|
92
98
|
if (m.pos) {
|
|
93
99
|
const v = Number.parseInt(m.pos)
|
|
94
100
|
if (Number.isNaN(v) || v < 0) {
|
|
95
|
-
flag.truncation.badlines.push([i,
|
|
101
|
+
flag.truncation.badlines.push([i, "invalid genomic position", lst])
|
|
96
102
|
return
|
|
97
103
|
}
|
|
98
104
|
m.pos = v
|
|
99
105
|
}
|
|
100
106
|
if (!m.losstype) {
|
|
101
|
-
flag.truncation.badlines.push([i,
|
|
107
|
+
flag.truncation.badlines.push([i, "missing lossType value", lst])
|
|
102
108
|
return
|
|
103
109
|
}
|
|
104
|
-
if (m.losstype !=
|
|
110
|
+
if (m.losstype != "n" && m.losstype != "c") {
|
|
105
111
|
flag.truncation.badlines.push([i, 'lossType value not "n" or "c"', lst])
|
|
106
112
|
return
|
|
107
113
|
}
|
|
108
114
|
if (bulk.parsesample(m, flag, i, lst, flag.truncation.badlines)) {
|
|
109
115
|
return
|
|
110
116
|
}
|
|
111
|
-
if (m.losstype ==
|
|
117
|
+
if (m.losstype == "n") {
|
|
112
118
|
m.dt = common.dtnloss
|
|
113
119
|
m.class = common.mclassnloss
|
|
114
|
-
m.mname =
|
|
120
|
+
m.mname = "N-loss"
|
|
115
121
|
} else {
|
|
116
122
|
m.dt = common.dtcloss
|
|
117
123
|
m.class = common.mclasscloss
|
|
118
|
-
m.mname =
|
|
124
|
+
m.mname = "C-loss"
|
|
119
125
|
}
|
|
120
126
|
flag.good++
|
|
121
127
|
const n = flag.geneToUpper ? m.gene.toUpperCase() : m.gene
|
package/src/clustering.js
CHANGED
|
@@ -1,34 +1,34 @@
|
|
|
1
1
|
export const clusterMethodLst = [
|
|
2
2
|
{
|
|
3
|
-
label:
|
|
4
|
-
value:
|
|
5
|
-
title: `Cluster by average value
|
|
3
|
+
label: "Average",
|
|
4
|
+
value: "average",
|
|
5
|
+
title: `Cluster by average value`,
|
|
6
6
|
},
|
|
7
7
|
{
|
|
8
8
|
label: `Complete`,
|
|
9
|
-
value:
|
|
10
|
-
title: `Use the complete clustering method
|
|
9
|
+
value: "complete",
|
|
10
|
+
title: `Use the complete clustering method`,
|
|
11
11
|
},
|
|
12
12
|
{
|
|
13
13
|
label: `Single`,
|
|
14
|
-
value:
|
|
15
|
-
title: `Use the single clustering method
|
|
14
|
+
value: "single",
|
|
15
|
+
title: `Use the single clustering method`,
|
|
16
16
|
},
|
|
17
17
|
{
|
|
18
18
|
label: `Ward.D`,
|
|
19
|
-
value:
|
|
20
|
-
title: `Use the ward.D clustering method
|
|
19
|
+
value: "ward.D",
|
|
20
|
+
title: `Use the ward.D clustering method`,
|
|
21
21
|
},
|
|
22
22
|
{
|
|
23
23
|
label: `Ward.D2`,
|
|
24
|
-
value:
|
|
25
|
-
title: `Use the ward.D2 clustering method
|
|
24
|
+
value: "ward.D2",
|
|
25
|
+
title: `Use the ward.D2 clustering method`,
|
|
26
26
|
},
|
|
27
27
|
{
|
|
28
28
|
label: `Mcquitty`,
|
|
29
|
-
value:
|
|
30
|
-
title: `Use the Mcquity clustering method
|
|
31
|
-
}
|
|
29
|
+
value: "mcquitty",
|
|
30
|
+
title: `Use the Mcquity clustering method`,
|
|
31
|
+
},
|
|
32
32
|
/* These methods are currently disabled because the dendrogram lines tend to cross one another.
|
|
33
33
|
{
|
|
34
34
|
label: `Centroid`,
|
|
@@ -44,23 +44,23 @@ export const clusterMethodLst = [
|
|
|
44
44
|
]
|
|
45
45
|
export const distanceMethodLst = [
|
|
46
46
|
{
|
|
47
|
-
label:
|
|
48
|
-
value:
|
|
49
|
-
title: `Calculate distance using euclidean method
|
|
47
|
+
label: "Euclidean",
|
|
48
|
+
value: "euclidean",
|
|
49
|
+
title: `Calculate distance using euclidean method`,
|
|
50
50
|
},
|
|
51
51
|
{
|
|
52
|
-
label:
|
|
53
|
-
value:
|
|
54
|
-
title: `Maximum distance between two components of x and y
|
|
52
|
+
label: "Maximum",
|
|
53
|
+
value: "maximum",
|
|
54
|
+
title: `Maximum distance between two components of x and y`,
|
|
55
55
|
},
|
|
56
56
|
{
|
|
57
|
-
label:
|
|
58
|
-
value:
|
|
59
|
-
title: `Calculate distance using the absolute distance between the two vectors
|
|
57
|
+
label: "Manhattan",
|
|
58
|
+
value: "manhattan",
|
|
59
|
+
title: `Calculate distance using the absolute distance between the two vectors`,
|
|
60
60
|
},
|
|
61
61
|
{
|
|
62
|
-
label:
|
|
63
|
-
value:
|
|
64
|
-
title: `Calculate distance using Canberra method
|
|
65
|
-
}
|
|
62
|
+
label: "Canberra",
|
|
63
|
+
value: "canberra",
|
|
64
|
+
title: `Calculate distance using Canberra method`,
|
|
65
|
+
},
|
|
66
66
|
]
|