@sjcrh/proteinpaint-shared 2.78.0-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/urljson.ts ADDED
@@ -0,0 +1,85 @@
1
+ /*
2
+ A custom encoder-decoder for URL query parameter values
3
+
4
+ All values are strings except for the following, which
5
+ will are assumed to be json-encoded:
6
+
7
+ - those that correspond to JSON reserved keywords: true, false, null
8
+ - numeric values using the isNumeric() function below
9
+ - values that are wrapped by "", {}, []
10
+
11
+ In addition, a URL-payload that includes an `encoding=urljson` parameter
12
+ will cause all query parameter values to be processed by the decode()
13
+ function below. This is not required, but may help remove ambiguity, especially
14
+ to distinguish urljson-encoded params vs legacy URL params that have
15
+ been manually coded in a way that doesn't conform to the expectations here.
16
+
17
+ Why not just encode every URL query parameter value as JSON?
18
+
19
+ - That makes the URL harder to read, since more encoding characters have to
20
+ be URI (percent) encoded. In contrast, since most values are strings or numbers,
21
+ the encoder below leaves those values alone for better readability of the URL. For
22
+ example, unambiguous strings would not have to be wrapped with double-quotes
23
+ that are then URI encoded.
24
+
25
+ - The decoder will always accept and correctly process values that are JSON-encoded.
26
+ So the encoding exceptions above do not prevent harder-to-read JSON-encoded string
27
+ values.
28
+ */
29
+
30
+ // a URL query parameters object with values to be encoded
31
+ export type UrlJsonRaw = {
32
+ [key: string]: any //boolean | string | number | any[] | null | undefined //| { [key: string]: any }
33
+ }
34
+
35
+ // a URL query parameters object with values to be decoded
36
+ export type UrlJsonEncoded = {
37
+ [key: string]: string
38
+ }
39
+
40
+ const reserved = ['false', 'true', 'null', 'undefined']
41
+ const delimiters = ['"', '{', '[']
42
+
43
+ export function encode(rawObject: UrlJsonRaw) {
44
+ const params: any[] = []
45
+ for (const [key, value] of Object.entries(rawObject)) {
46
+ if (typeof value == 'string' && !isNumeric(value) && !reserved.includes(value) && !delimiters.includes(value[0])) {
47
+ // no need to json-encode a string before percent encoding
48
+ // if it doesn't contain reserved JSON keywords/wrapper characters
49
+ params.push(`${key}=${encodeURIComponent(value)}`)
50
+ } else if (value !== undefined) {
51
+ params.push(`${key}=${encodeURIComponent(JSON.stringify(value))}`)
52
+ }
53
+ }
54
+ return params.join('&')
55
+ }
56
+
57
+ export function decode(query: UrlJsonEncoded) {
58
+ const encoding = query.encoding
59
+ for (const [key, value] of Object.entries(query)) {
60
+ //const value = query[key]
61
+ // if (value == 'undefined') {
62
+ // // maybe better to also detect this common error
63
+ // console.warn(`${key}="undefined" value as a string URL query parameter`)
64
+ // query[key] = undefined
65
+ // continue
66
+ // }
67
+ if (
68
+ encoding == 'json' ||
69
+ value == 'null' || // not new, always been
70
+ value == 'true' || // NEED TO FIND-REPLACE CODE THAT USES value == 'true'
71
+ value == 'false' || // NEED TO FIND-REPLACE CODE THAT USES value == 'false'
72
+ isNumeric(value) || // NEED TO check
73
+ (value.startsWith('"') && value.endsWith('"')) ||
74
+ (value.startsWith('{') && value.endsWith('}')) ||
75
+ (value.startsWith('[') && value.endsWith(']'))
76
+ )
77
+ query[key] = JSON.parse(value)
78
+ // else the value is already a string
79
+ }
80
+ return query
81
+ }
82
+
83
+ function isNumeric(d) {
84
+ return !isNaN(parseFloat(d)) && isFinite(d) && d !== ''
85
+ }
package/src/vcf.ann.js ADDED
@@ -0,0 +1,62 @@
1
+ import { vepinfo } from './common.js'
2
+
3
+ export function parse_ANN(str, header, m) {
4
+ // snpEff
5
+ if (!header) {
6
+ return null
7
+ }
8
+ for (const thisannotation of str.split(',')) {
9
+ const lst = thisannotation.replace(/&/g, ',').split('|')
10
+
11
+ const o = {}
12
+
13
+ for (let i = 0; i < header.length; i++) {
14
+ if (lst[i]) {
15
+ o[header[i].name] = lst[i]
16
+ }
17
+ }
18
+ if (!o.Allele) {
19
+ continue
20
+ }
21
+ let allele = null
22
+ for (const a of m.alleles) {
23
+ if (a.allele == o.Allele) {
24
+ allele = a
25
+ break
26
+ }
27
+ }
28
+ if (!allele) {
29
+ // cannot match to allele!!!
30
+ continue
31
+ }
32
+ if (!allele.ann) {
33
+ allele.ann = []
34
+ }
35
+ allele.ann.push(o)
36
+ o._gene = o.Gene_Name
37
+ // isoform
38
+ if (o.Feature_Type && o.Feature_Type == 'transcript' && o.Feature_ID) {
39
+ o._isoform = o.Feature_ID.split('.')[0]
40
+ }
41
+ // class
42
+ if (o.Annotation) {
43
+ const [dt, cls, rank] = vepinfo(o.Annotation)
44
+ o._dt = dt
45
+ o._class = cls
46
+ o._csqrank = rank
47
+ } else {
48
+ // FIXME
49
+ o._dt = dtsnvindel
50
+ o._class = mclassnonstandard
51
+ }
52
+ // mname
53
+ if (o['HGVS.p']) {
54
+ //o._mname=decodeURIComponent(o.HGVSp.substr(o.HGVSp.indexOf(':')+1))
55
+ o._mname = o['HGVS.p']
56
+ } else if (o['HGVS.c']) {
57
+ o._mname = o['HGVS.c']
58
+ } else {
59
+ }
60
+ }
61
+ return true
62
+ }
package/src/vcf.csq.js ADDED
@@ -0,0 +1,153 @@
1
+ import { vepinfo } from './common.js'
2
+ /*
3
+ parse csq field from a variant line, not header
4
+ CSQ header must have already been parsed
5
+
6
+ str: the csq value for a vcf line
7
+ header: [ // something like this
8
+ { name: 'Allele' },
9
+ { name: 'Consequence' },
10
+ { name: 'IMPACT' },
11
+ { name: 'SYMBOL' },
12
+ { name: 'Gene' },
13
+ { name: 'Feature_type' },
14
+ { name: 'Feature' },
15
+ { name: 'BIOTYPE' },
16
+ { name: 'EXON' },
17
+ { name: 'INTRON' },
18
+ { name: 'HGVSc' },
19
+ { name: 'HGVSp' },
20
+ { name: 'cDNA_position' },
21
+ { name: 'CDS_position' },
22
+ { name: 'Protein_position' },
23
+ { name: 'Amino_acids' },
24
+ { name: 'Codons' },
25
+ { name: 'Existing_variation' },
26
+ { name: 'DISTANCE' },
27
+ { name: 'STRAND' },
28
+ { name: 'FLAGS' },
29
+ { name: 'SYMBOL_SOURCE' },
30
+ { name: 'HGNC_ID' },
31
+ { name: 'CANONICAL' },
32
+ { name: 'REFSEQ_MATCH' },
33
+ { name: 'GIVEN_REF' },
34
+ { name: 'USED_REF' },
35
+ { name: 'BAM_EDIT' },
36
+ { name: 'HGVS_OFFSET' },
37
+ { name: 'CLIN_SIG' },
38
+ { name: 'SOMATIC' },
39
+ { name: 'PHENO' }
40
+ ]
41
+
42
+ m: {
43
+ mlst[ {} ]
44
+ .allele_original
45
+ .csq[ {} ] // parse_CSQ will add this array to this allele
46
+ ._class
47
+ ._csqrank
48
+ ._dt
49
+ ._gene
50
+ ._isoform
51
+ ._mname
52
+ }
53
+
54
+ */
55
+
56
+ export function parse_CSQ(str, header, m) {
57
+ if (!header) {
58
+ return null
59
+ }
60
+ for (const thisannotation of str.split(',')) {
61
+ const lst = thisannotation.replace(/&/g, ',').split('|')
62
+
63
+ const o = {}
64
+
65
+ for (let i = 0; i < header.length; i++) {
66
+ if (lst[i]) {
67
+ o[header[i].name] = lst[i]
68
+ }
69
+ }
70
+ if (!o.Allele) {
71
+ continue
72
+ }
73
+ let allele = null
74
+
75
+ //////////////////////////////////////
76
+ // NOTE
77
+ // mds2delete
78
+ // m.alleles[] is based on old vcf parsing and may delete?
79
+ // latest spec is m.mlst[]
80
+ //////////////////////////////////////
81
+
82
+ for (const a of m.mlst || m.alleles) {
83
+ if (a.allele_original == o.Allele) {
84
+ allele = a
85
+ break
86
+ }
87
+ }
88
+ if (!allele) {
89
+ if (o.Allele == '-') {
90
+ // deletion
91
+ if (m.mlst) {
92
+ if (m.mlst.length == 1) {
93
+ allele = m.mlst[0]
94
+ }
95
+ } else if (m.alleles) {
96
+ if (m.alleles.length == 1) {
97
+ allele = m.alleles[0]
98
+ }
99
+ }
100
+ } else {
101
+ for (const a of m.mlst || m.alleles) {
102
+ if (a.allele_original.substr(1) == o.Allele) {
103
+ // insertion, without first padding base
104
+ allele = a
105
+ break
106
+ }
107
+ }
108
+ }
109
+ if (!allele) {
110
+ // cannot match to allele!!!
111
+ continue
112
+ }
113
+ }
114
+ if (!allele.csq) {
115
+ allele.csq = []
116
+ }
117
+ allele.csq.push(o)
118
+
119
+ // gene
120
+ o._gene = o.SYMBOL || o.Gene
121
+
122
+ // isoform
123
+ if (o.Feature_type && o.Feature_type == 'Transcript') {
124
+ o._isoform = o.Feature.split('.')[0] // remove version
125
+ } else {
126
+ o._isoform = o._gene
127
+ }
128
+
129
+ // class
130
+ if (o.Consequence) {
131
+ const [dt, cls, rank] = vepinfo(o.Consequence)
132
+ o._dt = dt
133
+ o._class = cls
134
+ o._csqrank = rank
135
+ } else {
136
+ // FIXME
137
+ o._dt = dtsnvindel
138
+ o._class = mclassnonstandard
139
+ }
140
+ // mname
141
+ if (o.HGVSp) {
142
+ o._mname = decodeURIComponent(o.HGVSp.substr(o.HGVSp.indexOf(':') + 1))
143
+ } else if (o.Protein_position && o.Amino_acids) {
144
+ o._mname = decodeURIComponent(o.Protein_position + o.Amino_acids)
145
+ } else if (o.HGVSc) {
146
+ o._mname = o.HGVSc.substr(o.HGVSc.indexOf(':') + 1)
147
+ } else if (o.Existing_variation) {
148
+ o._name = o.Existing_variation
149
+ } else {
150
+ }
151
+ }
152
+ return true
153
+ }
@@ -0,0 +1,50 @@
1
+ /*
2
+ to parse a variant line, not header
3
+
4
+ cannot simply slice by /[;=]/, but read char by char
5
+ case CLNVI=Breast_Cancer_Information_Core__(BRCA2):745-4&base_change=C_to_G;
6
+ case k1=v1;DB;k2=v2;
7
+ */
8
+ export function dissect_INFO(str) {
9
+ //let findequal=true
10
+ let findsemicolon = false
11
+ let findequalorsemicolon = true
12
+
13
+ let i = 0
14
+ let idx = 0
15
+
16
+ const k2v = {}
17
+ let lastkey
18
+
19
+ while (i < str.length) {
20
+ const c = str[i]
21
+ if (findequalorsemicolon) {
22
+ if (c == '=') {
23
+ findsemicolon = true
24
+ findequalorsemicolon = false
25
+ lastkey = str.substring(idx, i)
26
+ idx = i + 1
27
+ } else if (c == ';') {
28
+ // should be a flag
29
+ k2v[str.substring(idx, i)] = 1
30
+ idx = i + 1
31
+ }
32
+ } else if (findsemicolon && c == ';') {
33
+ findequalorsemicolon = true
34
+ findsemicolon = false
35
+ k2v[lastkey] = str.substring(idx, i)
36
+ lastkey = null
37
+ idx = i + 1
38
+ }
39
+ i++
40
+ }
41
+
42
+ const remainstr = str.substr(idx, i)
43
+ if (lastkey) {
44
+ k2v[lastkey] = remainstr
45
+ } else {
46
+ k2v[remainstr] = 1
47
+ }
48
+
49
+ return k2v
50
+ }