@sjcrh/proteinpaint-server 2.44.0 → 2.46.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dataset/clinvar.hg19.js +53 -52
  2. package/dataset/clinvar.hg38.js +74 -73
  3. package/dataset/clinvar.js +164 -47
  4. package/dataset/termdb.test.js +257 -0
  5. package/genome/CriGri.js +1859 -27
  6. package/genome/cgc.js +743 -7
  7. package/genome/danRer10.js +1108 -46
  8. package/genome/dm3.js +71 -44
  9. package/genome/dm6.js +1926 -45
  10. package/genome/galGal5.js +23522 -46
  11. package/genome/galGal6.js +512 -46
  12. package/genome/hg19.js +293 -198
  13. package/genome/hg38.js +472 -105
  14. package/genome/hg38.test.js +406 -40
  15. package/genome/hgvirus.js +45 -20
  16. package/genome/mm10.js +135 -67
  17. package/genome/mm9.js +116 -79
  18. package/genome/rn6.js +1002 -47
  19. package/package.json +31 -35
  20. package/routes/_template_.js +30 -0
  21. package/routes/burden.js +149 -0
  22. package/routes/dataset.js +266 -0
  23. package/routes/dsdata.js +127 -0
  24. package/routes/gdc.maf.js +120 -0
  25. package/routes/gdc.mafBuild.js +106 -0
  26. package/routes/gdc.topMutatedGenes.js +465 -0
  27. package/routes/gene2canonicalisoform.js +41 -0
  28. package/routes/genelookup.js +52 -0
  29. package/routes/genomes.js +144 -0
  30. package/routes/healthcheck.js +30 -0
  31. package/routes/hicdata.js +98 -0
  32. package/routes/hicstat.js +55 -0
  33. package/routes/isoformlst.js +57 -0
  34. package/routes/ntseq.js +43 -0
  35. package/routes/pdomain.js +61 -0
  36. package/routes/snp.js +107 -0
  37. package/routes/termdb.categories.js +209 -0
  38. package/routes/termdb.cluster.js +228 -0
  39. package/routes/termdb.cohort.summary.js +38 -0
  40. package/routes/termdb.cohorts.js +49 -0
  41. package/routes/termdb.config.js +202 -0
  42. package/routes/termdb.getdescrstats.js +102 -0
  43. package/routes/termdb.getnumericcategories.js +92 -0
  44. package/routes/termdb.getpercentile.js +108 -0
  45. package/routes/termdb.getrootterm.js +65 -0
  46. package/routes/termdb.gettermchildren.js +67 -0
  47. package/routes/termdb.singleSampleMutation.js +80 -0
  48. package/routes/termdb.singlecellData.js +46 -0
  49. package/routes/termdb.singlecellSamples.js +160 -0
  50. package/routes/termdb.termsbyids.js +59 -0
  51. package/routes/termdb.topVariablyExpressedGenes.js +171 -0
  52. package/routes/termdb.violin.js +77 -0
  53. package/src/app.js +41500 -0
  54. package/src/serverconfig.js +14 -8
  55. package/start.js +3 -3
  56. package/routes/README.md +0 -84
  57. package/routes/burden.ts +0 -143
  58. package/routes/gdc.maf.ts +0 -195
  59. package/routes/gdc.mafBuild.ts +0 -114
  60. package/routes/gdc.topMutatedGenes.ts +0 -586
  61. package/routes/genelookup.ts +0 -50
  62. package/routes/healthcheck.ts +0 -29
  63. package/routes/hicdata.ts +0 -111
  64. package/routes/hicstat.ts +0 -55
  65. package/routes/termdb.categories.ts +0 -245
  66. package/routes/termdb.cluster.ts +0 -248
  67. package/routes/termdb.getdescrstats.ts +0 -102
  68. package/routes/termdb.getnumericcategories.ts +0 -99
  69. package/routes/termdb.getpercentile.ts +0 -118
  70. package/routes/termdb.getrootterm.ts +0 -73
  71. package/routes/termdb.gettermchildren.ts +0 -82
  72. package/routes/termdb.singleSampleMutation.ts +0 -87
  73. package/routes/termdb.singlecellData.ts +0 -49
  74. package/routes/termdb.singlecellSamples.ts +0 -175
  75. package/routes/termdb.termsbyids.ts +0 -63
  76. package/routes/termdb.topVariablyExpressedGenes.ts +0 -214
  77. package/routes/termdb.violin.ts +0 -77
  78. package/server.js +0 -2
  79. package/server.js.map +0 -1
  80. package/shared/common.js +0 -1080
  81. package/shared/termdb.initbinconfig.js +0 -96
  82. package/shared/vcf.js +0 -629
package/shared/common.js DELETED
@@ -1,1080 +0,0 @@
1
- /*
2
- shared between client and server
3
-
4
- exported functions
5
- - bplen()
6
- - mclasstester()
7
- - basecompliment()
8
-
9
-
10
- */
11
- import { rgb } from 'd3-color'
12
- import * as d3scale from 'd3-scale'
13
- import * as d3 from 'd3'
14
-
15
- export const defaultcolor = rgb('#8AB1D4').darker()
16
- export const default_text_color = rgb('#aaa').darker().darker()
17
-
18
- export const exoncolor = '#4F8053'
19
-
20
- // something that has something to do with coding gene reading frame
21
- export const IN_frame = true
22
- export const OUT_frame = false
23
-
24
- export const dtsnvindel = 1
25
- export const dtfusionrna = 2
26
- export const dtgeneexpression = 3
27
- export const dtcnv = 4
28
- export const dtsv = 5
29
- export const dtitd = 6
30
- export const dtdel = 7
31
- export const dtnloss = 8
32
- export const dtcloss = 9
33
- export const dtloh = 10 // to be used in svcnv track
34
- export const plotColor = '#ce768e'
35
-
36
- export const dt2label = {
37
- [dtsnvindel]: 'SNV/indel',
38
- [dtfusionrna]: 'Fusion RNA',
39
- [dtcnv]: 'CNV',
40
- [dtsv]: 'SV',
41
- [dtitd]: 'ITD',
42
- [dtdel]: 'Deletion',
43
- [dtnloss]: 'N-loss',
44
- [dtcloss]: 'C-loss',
45
- [dtloh]: 'LOH',
46
- [dtgeneexpression]: 'Gene Expression'
47
- }
48
-
49
- export const mclass = {
50
- M: {
51
- label: 'MISSENSE',
52
- color: '#3987CC',
53
- dt: dtsnvindel,
54
- desc: 'A substitution variant in the coding region resulting in altered protein coding.',
55
- key: 'M'
56
- },
57
- E: { label: 'EXON', color: '#bcbd22', dt: dtsnvindel, desc: 'A variant in the exon of a non-coding RNA.', key: 'E' },
58
- F: {
59
- label: 'FRAMESHIFT',
60
- color: '#db3d3d',
61
- dt: dtsnvindel,
62
- desc: 'An insertion or deletion variant that alters the protein coding frame.',
63
- key: 'F'
64
- },
65
- N: {
66
- label: 'NONSENSE',
67
- color: '#ff7f0e',
68
- dt: dtsnvindel,
69
- desc: 'A variant altering protein coding to produce a premature stopgain or stoploss.',
70
- key: 'N'
71
- },
72
- S: {
73
- label: 'SILENT',
74
- color: '#2ca02c',
75
- dt: dtsnvindel,
76
- desc: 'A substitution variant in the coding region that does not alter protein coding.',
77
- key: 'S'
78
- },
79
- D: {
80
- label: 'PROTEINDEL',
81
- color: '#7f7f7f',
82
- dt: dtsnvindel,
83
- desc: 'A deletion resulting in a loss of one or more codons from the product, but not altering the protein coding frame.',
84
- key: 'D'
85
- },
86
- I: {
87
- label: 'PROTEININS',
88
- color: '#8c564b',
89
- dt: dtsnvindel,
90
- desc: 'An insertion introducing one or more codons into the product, but not altering the protein coding frame.',
91
- key: 'I'
92
- },
93
- P: {
94
- label: 'SPLICE_REGION',
95
- color: '#9467bd',
96
- dt: dtsnvindel,
97
- desc: 'A variant in an intron within 10 nt of an exon boundary.',
98
- key: 'P'
99
- },
100
- L: {
101
- label: 'SPLICE',
102
- color: '#6633FF',
103
- dt: dtsnvindel,
104
- desc: 'A variant near an exon edge that may affect splicing functionality.',
105
- key: 'L'
106
- },
107
- Intron: { label: 'INTRON', color: '#656565', dt: dtsnvindel, desc: 'An intronic variant.', key: 'Intron' },
108
-
109
- // quick fix!! for showing genes that are not tested in samples (e.g. gene panels) in the heatmap
110
- Blank: { label: 'Not tested', color: '#fff', dt: dtsnvindel, desc: 'This gene is not tested.', key: 'Blank' },
111
-
112
- WT: { label: 'Wildtype', color: '#D3D3D3', dt: dtsnvindel, desc: 'Wildtype', key: 'WT' }
113
- }
114
- export const mclassitd = 'ITD'
115
- mclass[mclassitd] = {
116
- label: 'ITD',
117
- color: '#ff70ff',
118
- dt: dtitd,
119
- desc: 'In-frame internal tandem duplication.',
120
- key: mclassitd
121
- }
122
-
123
- export const mclassdel = 'DEL'
124
- mclass[mclassdel] = {
125
- label: 'DELETION, intragenic',
126
- color: '#858585',
127
- dt: dtdel,
128
- desc: 'Intragenic deletion.',
129
- key: mclassdel
130
- }
131
-
132
- export const mclassnloss = 'NLOSS'
133
- mclass[mclassnloss] = {
134
- label: 'N-terminus loss',
135
- color: '#545454',
136
- dt: dtnloss,
137
- desc: 'N-terminus loss due to translocation',
138
- key: mclassnloss
139
- }
140
-
141
- export const mclasscloss = 'CLOSS'
142
- mclass[mclasscloss] = {
143
- label: 'C-terminus loss',
144
- color: '#545454',
145
- dt: dtcloss,
146
- desc: 'C-terminus loss due to translocation',
147
- key: mclasscloss
148
- }
149
-
150
- export const mclassutr3 = 'Utr3'
151
- mclass[mclassutr3] = {
152
- label: 'UTR_3',
153
- color: '#998199',
154
- dt: dtsnvindel,
155
- desc: "A variant in the 3' untranslated region.",
156
- key: mclassutr3
157
- }
158
-
159
- export const mclassutr5 = 'Utr5'
160
- mclass[mclassutr5] = {
161
- label: 'UTR_5',
162
- color: '#819981',
163
- dt: dtsnvindel,
164
- desc: "A variant in the 5' untranslated region.",
165
- key: mclassutr5
166
- }
167
-
168
- export const mclassnonstandard = 'X'
169
- mclass[mclassnonstandard] = {
170
- label: 'NONSTANDARD',
171
- color: 'black',
172
- dt: dtsnvindel,
173
- desc: 'A mutation class that either does not match our notation, or is unspecified.',
174
- key: mclassnonstandard
175
- }
176
-
177
- export const mclassnoncoding = 'noncoding'
178
- mclass[mclassnoncoding] = {
179
- label: 'NONCODING',
180
- color: 'black',
181
- dt: dtsnvindel,
182
- desc: 'Noncoding mutation.',
183
- key: mclassnoncoding
184
- }
185
- // done point mutations
186
-
187
- export function mclasstester(s) {
188
- switch (s.toLowerCase()) {
189
- case 'missense_mutation':
190
- return 'M'
191
- case 'nonsense_mutation':
192
- return 'N'
193
- case 'splice_site':
194
- return 'L'
195
- case 'rna':
196
- return mclassnoncoding
197
- case 'frame_shift_del':
198
- return 'F'
199
- case 'frame_shift_ins':
200
- return 'F'
201
- case 'in_frame_del':
202
- return 'D'
203
- case 'in_frame_ins':
204
- return 'I'
205
- case 'translation_start_site':
206
- return mclassnonstandard
207
- case 'nonstop_mutation':
208
- return 'N'
209
- case "3'utr":
210
- return mclassutr3
211
- case "3'flank":
212
- return mclassnoncoding
213
- case "5'utr":
214
- return mclassutr5
215
- case "5'flank":
216
- return mclassnoncoding
217
- case 'blank':
218
- return 'Blank'
219
- default:
220
- return null
221
- }
222
- }
223
-
224
- export const mclassfusionrna = 'Fuserna'
225
- mclass[mclassfusionrna] = {
226
- label: 'Fusion transcript',
227
- color: '#545454',
228
- dt: dtfusionrna,
229
- desc:
230
- 'Marks the break points leading to fusion transcripts.<br>' +
231
- '<span style="font-size:150%">&#9680;</span> - 3\' end of the break point is fused to the 5\' end of another break point in a different gene.<br>' +
232
- '<span style="font-size:150%">&#9681;</span> - 5\' end of the break point is fused to the 3\' end of another break point in a different gene.',
233
- key: mclassfusionrna
234
- }
235
- export const mclasssv = 'SV'
236
- mclass[mclasssv] = {
237
- label: 'Structural variation',
238
- color: '#858585',
239
- dt: dtsv,
240
- desc: 'Structural variation detected in genomic DNA.',
241
- key: mclasssv
242
- }
243
-
244
- export const mclasscnvgain = 'CNV_amp'
245
- mclass[mclasscnvgain] = {
246
- label: 'Copy number gain',
247
- color: '#e9a3c9',
248
- dt: dtcnv,
249
- desc: 'Copy number gain',
250
- key: mclasscnvgain
251
- }
252
-
253
- export const mclasscnvloss = 'CNV_loss'
254
- mclass[mclasscnvloss] = {
255
- label: 'Copy number loss',
256
- color: '#a1d76a',
257
- dt: dtcnv,
258
- desc: 'Copy number loss',
259
- key: mclasscnvloss
260
- }
261
-
262
- export const mclasscnvloh = 'CNV_loh'
263
- mclass[mclasscnvloh] = { label: 'LOH', color: '#12EDFC', dt: dtcnv, desc: 'Loss of heterozygosity', key: mclasscnvloh }
264
-
265
- // for VCF
266
- export const mclasssnv = 'snv'
267
- mclass[mclasssnv] = {
268
- label: 'SNV',
269
- color: '#92a2d4',
270
- dt: dtsnvindel,
271
- desc: 'Single nucleotide variation',
272
- key: mclasssnv
273
- }
274
-
275
- export const mclassmnv = 'mnv'
276
- mclass[mclassmnv] = {
277
- label: 'MNV',
278
- color: '#92a2d4',
279
- dt: dtsnvindel,
280
- desc: 'Multiple nucleotide variation',
281
- key: mclassmnv
282
- }
283
-
284
- export const mclassinsertion = 'insertion'
285
- mclass[mclassinsertion] = {
286
- label: 'Sequence insertion',
287
- color: '#bd8e91',
288
- dt: dtsnvindel,
289
- desc: 'Sequence insertion',
290
- key: mclassinsertion
291
- }
292
-
293
- export const mclassdeletion = 'deletion'
294
- mclass[mclassdeletion] = {
295
- label: 'Sequence deletion',
296
- color: '#b5a174',
297
- dt: dtsnvindel,
298
- desc: 'Sequence deletion',
299
- key: mclassdeletion
300
- }
301
- // TODO complex indel
302
-
303
- export const dt2color = {
304
- [dtsnvindel]: mclass.M.color // general color for snvindel irrespective of class (when class is not available)
305
- // add new dt as needed
306
- }
307
-
308
- // option to override mutation class attribute values
309
- export function applyOverrides(overrides = {}) {
310
- if (overrides.mclass) {
311
- for (const key in overrides.mclass) {
312
- // allow to fill-in mutation class that are missing from mclass;
313
- // may be useful for things like 'Not tested', etc, that may not be in mclass by default
314
- // but are used by a customer with its own PP server instance
315
- if (!mclass[key]) mclass[key] = {}
316
- for (const subkey in overrides.mclass[key]) {
317
- mclass[key][subkey] = overrides.mclass[key][subkey]
318
- }
319
- }
320
- }
321
- }
322
-
323
- export const vepinfo = function (s) {
324
- const l = s.toLowerCase().split(',')
325
- let rank = 1
326
- if (l.indexOf('transcript_ablation') != -1) {
327
- // FIXME no class for whole gene deletion
328
- return [dtdel, mclassdel, rank]
329
- }
330
- rank++
331
- if (l.indexOf('splice_acceptor_variant') != -1) return [dtsnvindel, 'L', rank]
332
- rank++
333
- if (l.indexOf('splice_donor_variant') != -1) return [dtsnvindel, 'L', rank]
334
- rank++
335
- if (l.indexOf('stop_gained') != -1) return [dtsnvindel, 'N', rank]
336
- rank++
337
- if (l.indexOf('frameshift_variant') != -1) return [dtsnvindel, 'F', rank]
338
- rank++
339
- if (l.indexOf('stop_lost') != -1) return [dtsnvindel, 'N', rank]
340
- rank++
341
- if (l.indexOf('start_lost') != -1) return [dtsnvindel, 'N', rank]
342
- rank++
343
- if (l.indexOf('transcript_amplification') != -1) {
344
- // FIXME no class for whole gene amp
345
- return [dtsnvindel, mclassnonstandard, rank]
346
- }
347
- rank++
348
- if (
349
- l.indexOf('inframe_insertion') != -1 ||
350
- l.indexOf('conservative_inframe_insertion') != -1 ||
351
- l.indexOf('disruptive_inframe_insertion') != -1
352
- )
353
- return [dtsnvindel, 'I', rank]
354
- rank++
355
- if (
356
- l.indexOf('inframe_deletion') != -1 ||
357
- l.indexOf('conservative_inframe_deletion') != -1 ||
358
- l.indexOf('disruptive_inframe_deletion') != -1
359
- )
360
- return [dtsnvindel, 'D', rank]
361
- rank++
362
- if (l.indexOf('missense_variant') != -1) return [dtsnvindel, 'M', rank]
363
- rank++
364
- if (l.indexOf('protein_altering_variant') != -1) return [dtsnvindel, 'N', rank]
365
- rank++
366
- if (l.indexOf('splice_region_variant') != -1) return [dtsnvindel, 'P', rank]
367
- rank++
368
- if (l.indexOf('incomplete_terminal_codon_variant') != -1) return [dtsnvindel, 'N', rank]
369
- rank++
370
- if (l.indexOf('stop_retained_variant') != -1) return [dtsnvindel, 'S', rank]
371
- rank++
372
- if (l.indexOf('synonymous_variant') != -1) return [dtsnvindel, 'S', rank]
373
- rank++
374
- if (l.indexOf('coding_sequence_variant') != -1) return [dtsnvindel, mclassnonstandard, rank]
375
- rank++
376
- if (l.indexOf('mature_mirna_variant') != -1) return [dtsnvindel, 'E', rank]
377
- rank++
378
- if (l.indexOf('5_prime_utr_variant') != -1) return [dtsnvindel, mclassutr5, rank]
379
- rank++
380
- if (l.indexOf('3_prime_utr_variant') != -1) return [dtsnvindel, mclassutr3, rank]
381
- rank++
382
- if (l.indexOf('non_coding_transcript_exon_variant') != -1) return [dtsnvindel, 'E', rank]
383
- rank++
384
- if (l.indexOf('intron_variant') != -1) return [dtsnvindel, 'Intron', rank]
385
- rank++
386
- if (l.indexOf('nmd_transcript_variant') != -1) return [dtsnvindel, 'S', rank]
387
- rank++
388
- if (l.indexOf('non_coding_transcript_variant') != -1) return [dtsnvindel, 'E', rank]
389
- rank++
390
- if (l.indexOf('upstream_gene_variant') != -1) return [dtsnvindel, mclassnoncoding, rank]
391
- rank++
392
- if (l.indexOf('downstream_gene_variant') != -1) return [dtsnvindel, mclassnoncoding, rank]
393
- rank++
394
- if (l.indexOf('tfbs_ablation') != -1) return [dtsnvindel, mclassnoncoding, rank]
395
- rank++
396
- if (l.indexOf('tfbs_amplification') != -1) return [dtsnvindel, mclassnoncoding, rank]
397
- rank++
398
- if (l.indexOf('tf_binding_site_variant') != -1) return [dtsnvindel, mclassnoncoding, rank]
399
- rank++
400
- if (l.indexOf('regulatory_region_ablation') != -1) return [dtsnvindel, mclassnoncoding, rank]
401
- rank++
402
- if (l.indexOf('regulatory_region_amplification') != -1) return [dtsnvindel, mclassnoncoding, rank]
403
- rank++
404
- if (l.indexOf('feature_elongation') != -1) return [dtsnvindel, mclassnoncoding, rank]
405
- rank++
406
- if (l.indexOf('regulatory_region_variant') != -1) return [dtsnvindel, mclassnoncoding, rank]
407
- rank++
408
- if (l.indexOf('feature_truncation') != -1) return [dtsnvindel, mclassnoncoding, rank]
409
- rank++
410
- if (l.indexOf('intergenic_variant') != -1) return [dtsnvindel, mclassnoncoding, rank]
411
- rank++
412
- return [dtsnvindel, mclassnonstandard, rank]
413
- }
414
-
415
- // m orgin
416
- export const germlinelegend =
417
- '<circle cx="7" cy="12" r="7" fill="#b1b1b1"></circle><path d="M6.735557395310443e-16,-11A11,11 0 0,1 11,0L9,0A9,9 0 0,0 5.51091059616309e-16,-9Z" transform="translate(7,12)" fill="#858585" stroke="none"></path>'
418
-
419
- export const morigin = {}
420
-
421
- export const moriginsomatic = 'S'
422
- morigin[moriginsomatic] = {
423
- label: 'Somatic',
424
- desc: 'A variant found only in a tumor sample. The proportion is indicated by lack of any arc.',
425
- legend: '<circle cx="7" cy="12" r="7" fill="#b1b1b1"></circle>'
426
- }
427
- export const morigingermline = 'G'
428
- morigin[morigingermline] = {
429
- label: 'Germline',
430
- desc: 'A constitutional variant found in a normal sample. The proportion is indicated by the span of the solid arc within the whole circle.',
431
- legend: germlinelegend
432
- }
433
-
434
- morigin.germline = morigin[morigingermline]
435
- morigin.somatic = morigin[moriginsomatic]
436
-
437
- export const moriginrelapse = 'R'
438
- morigin[moriginrelapse] = {
439
- label: 'Relapse',
440
- desc: 'A somatic variant found only in a relapse sample. The proportion is indicated by the span of the hollow arc within the whole circle.',
441
- legend:
442
- '<circle cx="7" cy="12" r="7" fill="#b1b1b1"></circle><path d="M6.735557395310443e-16,-11A11,11 0 0,1 11,0L9,0A9,9 0 0,0 5.51091059616309e-16,-9Z" transform="translate(7,12)" fill="none" stroke="#858585"></path>'
443
- }
444
- export const morigingermlinepathogenic = 'GP'
445
- morigin[morigingermlinepathogenic] = {
446
- label: 'Germline pathogenic',
447
- desc: 'A constitutional variant with pathogenic allele.',
448
- legend: germlinelegend
449
- }
450
- export const morigingermlinenonpathogenic = 'GNP'
451
- morigin[morigingermlinenonpathogenic] = {
452
- label: 'Germline non-pathogenic',
453
- desc: 'A constitutional variant with non-pathogenic allele.',
454
- legend: germlinelegend,
455
- hidden: true
456
- }
457
-
458
- export const tkt = {
459
- usegm: 'usegm',
460
- ds: 'dataset',
461
- bigwig: 'bigwig',
462
- bigwigstranded: 'bigwigstranded',
463
- junction: 'junction',
464
- mdsjunction: 'mdsjunction',
465
- mdscnv: 'mdscnv',
466
- mdssvcnv: 'mdssvcnv', // no longer use as driver
467
- mdsexpressionrank: 'mdsexpressionrank',
468
- mdsvcf: 'mdsvcf', // for snv/indels, currently vcf, may include MAF
469
- //mdsgeneral:'mdsgeneral', // replaces mdssvcnv ****** not ready yet
470
- bedj: 'bedj',
471
- pgv: 'profilegenevalue',
472
- bampile: 'bampile',
473
- hicstraw: 'hicstraw',
474
- expressionrank: 'expressionrank',
475
- aicheck: 'aicheck',
476
- ase: 'ase',
477
- mds2: 'mds2', // mds 2nd gen
478
- mds3: 'mds3', // 3rd gen
479
- bedgraphdot: 'bedgraphdot',
480
- bam: 'bam',
481
- ld: 'ld'
482
- }
483
-
484
- export function validtkt(what) {
485
- for (const k in tkt) {
486
- if (what == tkt[k]) {
487
- return true
488
- }
489
- }
490
- return false
491
- }
492
-
493
- /*
494
- member track types from mdsvcf
495
- to get rid of hardcoded strings
496
- in future may include MAF format files
497
- */
498
- export const mdsvcftype = {
499
- vcf: 'vcf'
500
- }
501
-
502
- /*
503
- for custom mdssvcnv track
504
- or general track
505
- to avoid using hard-coded string
506
- */
507
- export const custommdstktype = {
508
- vcf: 'vcf',
509
- svcnvitd: 'svcnvitd',
510
- geneexpression: 'geneexpression'
511
- }
512
-
513
- // codons that are not here are stop codon!!
514
- export const codon = {
515
- GCT: 'A',
516
- GCC: 'A',
517
- GCA: 'A',
518
- GCG: 'A',
519
- CGT: 'R',
520
- CGC: 'R',
521
- CGA: 'R',
522
- CGG: 'R',
523
- AGA: 'R',
524
- AGG: 'R',
525
- AAT: 'N',
526
- AAC: 'N',
527
- GAT: 'D',
528
- GAC: 'D',
529
- TGT: 'C',
530
- TGC: 'C',
531
- CAA: 'Q',
532
- CAG: 'Q',
533
- GAA: 'E',
534
- GAG: 'E',
535
- GGT: 'G',
536
- GGC: 'G',
537
- GGA: 'G',
538
- GGG: 'G',
539
- CAT: 'H',
540
- CAC: 'H',
541
- ATT: 'I',
542
- ATC: 'I',
543
- ATA: 'I',
544
- TTA: 'L',
545
- TTG: 'L',
546
- CTT: 'L',
547
- CTC: 'L',
548
- CTA: 'L',
549
- CTG: 'L',
550
- AAA: 'K',
551
- AAG: 'K',
552
- ATG: 'M',
553
- TTT: 'F',
554
- TTC: 'F',
555
- CCT: 'P',
556
- CCC: 'P',
557
- CCA: 'P',
558
- CCG: 'P',
559
- TCT: 'S',
560
- TCC: 'S',
561
- TCA: 'S',
562
- TCG: 'S',
563
- AGT: 'S',
564
- AGC: 'S',
565
- ACT: 'T',
566
- ACC: 'T',
567
- ACA: 'T',
568
- ACG: 'T',
569
- TGG: 'W',
570
- TAT: 'Y',
571
- TAC: 'Y',
572
- GTT: 'V',
573
- GTC: 'V',
574
- GTA: 'V',
575
- GTG: 'V'
576
- }
577
-
578
- export const codon_stop = '*'
579
-
580
- export function nt2aa(gm) {
581
- // must convert genome seq to upper case!!!
582
- if (!gm.genomicseq) return undefined
583
- const enlst = []
584
- if (gm.coding) {
585
- for (const [i, e] of gm.coding.entries()) {
586
- const s = gm.genomicseq.substr(e[0] - gm.start, e[1] - e[0])
587
- if (gm.strand == '-') {
588
- enlst.push(reversecompliment(s))
589
- } else {
590
- enlst.push(s)
591
- }
592
- }
593
- }
594
- const nt = enlst.join('')
595
- const pep = []
596
-
597
- /*
598
- if startCodonFrame is set, will not begin translation from first nt, but will skip 1 or 2 nt at the beginning
599
- in case of IGKC, frame=1 means it will borrow 1 nt from the previous IGKJ exons
600
- so the first two nucleotides from the current exon will have to be skipped when translating IGKC alone
601
- */
602
- const startntidx = gm.startCodonFrame ? 3 - gm.startCodonFrame : 0
603
- for (let i = startntidx; i < nt.length; i += 3) {
604
- const a = codon[nt.substr(i, 3)]
605
- pep.push(a || codon_stop)
606
- }
607
- gm.cdseq = nt
608
- return pep.join('')
609
- }
610
-
611
- export function bplen(len, isfile) {
612
- // if "isfile" is true, to measure file size instead of basepair len
613
- if (len >= 1000000000) return (len / 1000000000).toFixed(1) + ' Gb'
614
- if (len >= 10000000) return Math.ceil(len / 1000000) + ' Mb'
615
- if (len >= 1000000) return (len / 1000000).toFixed(1) + ' Mb'
616
- if (len >= 10000) return Math.ceil(len / 1000) + ' Kb'
617
- if (len >= 1000) return (len / 1000).toFixed(1) + ' Kb'
618
- return len + (isfile ? 'bytes' : ' bp')
619
- }
620
-
621
- export const basecolor = {
622
- A: '#ca0020',
623
- T: '#f4a582',
624
- C: '#92c5de',
625
- G: '#0571b0'
626
- }
627
-
628
- export function basecompliment(nt) {
629
- switch (nt) {
630
- case 'A':
631
- return 'T'
632
- case 'T':
633
- return 'A'
634
- case 'C':
635
- return 'G'
636
- case 'G':
637
- return 'C'
638
- case 'a':
639
- return 't'
640
- case 't':
641
- return 'a'
642
- case 'c':
643
- return 'g'
644
- case 'g':
645
- return 'c'
646
- default:
647
- return nt
648
- }
649
- }
650
-
651
- export function reversecompliment(s) {
652
- const tmp = []
653
- for (let i = s.length - 1; i >= 0; i--) {
654
- tmp.push(basecompliment(s[i]))
655
- }
656
- return tmp.join('')
657
- }
658
-
659
- export function spliceeventchangegmexon(gm, evt) {
660
- /*
661
- alter gm.coding[], by exon-skip/alt events
662
- for frame checking
663
- gm must have coding
664
- */
665
- const gm2 = {
666
- chr: gm.chr,
667
- start: gm.start,
668
- stop: gm.stop,
669
- strand: gm.strand,
670
- coding: []
671
- }
672
- if (evt.isskipexon || evt.isaltexon) {
673
- for (let i = 0; i < gm.exon.length; i++) {
674
- const codingstart = Math.max(gm.codingstart, gm.exon[i][0])
675
- const codingstop = Math.min(gm.codingstop, gm.exon[i][1])
676
- if (codingstart > codingstop) {
677
- // not coding exon
678
- continue
679
- }
680
- if (evt.skippedexon.indexOf(i) == -1) {
681
- // not skipped
682
- gm2.coding.push([codingstart, codingstop])
683
- } else {
684
- // skipped
685
- }
686
- }
687
- } else if (evt.a5ss || evt.a3ss) {
688
- // still equal number of exons
689
- // adjust the affected exon first, then figure out coding[]
690
- const exons = gm.exon.map(e => [e[0], e[1]])
691
- const forward = gm.strand == '+'
692
- if (evt.a5ss) {
693
- if (forward) {
694
- exons[evt.exon5idx][1] = evt.junctionB.start
695
- } else {
696
- exons[evt.exon5idx + 1][0] = evt.junctionB.stop
697
- }
698
- } else {
699
- if (forward) {
700
- exons[evt.exon5idx + 1][0] = evt.junctionB.stop
701
- } else {
702
- exons[evt.exon5idx][1] = evt.junctionB.start
703
- }
704
- }
705
- // from new exons, figure out coding exons
706
- for (const e of exons) {
707
- const codingstart = Math.max(gm.codingstart, e[0])
708
- const codingstop = Math.min(gm.codingstop, e[1])
709
- if (codingstart > codingstop) {
710
- // not coding exon
711
- continue
712
- }
713
- gm2.coding.push([codingstart, codingstop])
714
- }
715
- }
716
- return gm2
717
- }
718
-
719
- export function fasta2gmframecheck(gm, str) {
720
- /*
721
- gm{}
722
- .chr
723
- .start
724
- .stop
725
- start/stop is transcript position
726
- .strand
727
- .coding[]
728
- str
729
- samtools faidx output
730
- */
731
- const lines = str.split('\n')
732
- // remove fasta header
733
- lines.shift()
734
- gm.genomicseq = lines.join('').toUpperCase()
735
-
736
- const aaseq = nt2aa(gm)
737
-
738
- let thisframe = OUT_frame
739
- const stopcodonidx = aaseq.indexOf(codon_stop)
740
- if (stopcodonidx == aaseq.length - 1) {
741
- // the first appearance of stop codon is at the last of translation
742
- thisframe = IN_frame
743
- }
744
- return thisframe
745
- }
746
-
747
- export function validate_vcfinfofilter(obj) {
748
- /*
749
- validate vcfinfofilter as from embedding api or dataset
750
- */
751
-
752
- if (!obj.lst) return '.lst missing'
753
-
754
- if (!Array.isArray(obj.lst)) return 'input is not an array'
755
-
756
- for (const set of obj.lst) {
757
- if (!set.name) return 'name missing from a set of .vcfinfofilter.lst'
758
-
759
- if (set.autocategory || set.categories) {
760
- // categorical info, auto or defined
761
-
762
- if (!set.autocategory) {
763
- for (const k in set.categories) {
764
- const v = set.categories[k]
765
- if (!set.autocolor && !v.color)
766
- return '.color missing for class ' + k + ' from .categories of set ' + set.name
767
- if (!v.label) {
768
- v.label = k
769
- }
770
- }
771
- }
772
-
773
- if (set.categoryhidden) {
774
- for (const k in set.categoryhidden) {
775
- if (!set.categories[k]) return 'unknown hidden-by-default category ' + k + ' from set ' + set.name
776
- }
777
- } else {
778
- set.categoryhidden = {}
779
- }
780
- } else if (set.numericfilter) {
781
- // otherwise, numerical value, the style of population frequency filter
782
- const lst = []
783
- for (const v of set.numericfilter) {
784
- if (typeof v == 'number') {
785
- /*
786
- just a number, defaults to 'lower-than'
787
- */
788
- lst.push({ side: '<', value: v })
789
- } else {
790
- lst.push({
791
- side: v.side || '<',
792
- value: v.value
793
- })
794
- }
795
- }
796
- set.numericfilter = lst
797
-
798
- //return 'no .categories or .numericfilter from set '+set.name
799
- }
800
-
801
- if (set.altalleleinfo) {
802
- if (!set.altalleleinfo.key) {
803
- return '.key missing from .altalleleinfo from set ' + set.name
804
- }
805
- } else if (set.locusinfo) {
806
- if (!set.locusinfo.key) {
807
- return '.key missing from .locusinfo from set ' + set.name
808
- }
809
- } else {
810
- return 'neither .altalleleinfo or .locusinfo is available from set ' + set.name
811
- }
812
- }
813
- }
814
-
815
- export function contigNameNoChr(genome, chrlst) {
816
- /*
817
- FIXME hard-coded for human genome styled chromosome names
818
- */
819
- for (const n in genome.majorchr) {
820
- if (chrlst.indexOf(n.replace('chr', '')) != -1) {
821
- return true
822
- }
823
- }
824
- if (genome.minorchr) {
825
- for (const n in genome.minorchr) {
826
- if (chrlst.indexOf(n.replace('chr', '')) != -1) {
827
- return true
828
- }
829
- }
830
- }
831
- return false
832
- }
833
- export function contigNameNoChr2(genome, chrlst) {
834
- // returns number of matching chr names that either includes "chr" or not
835
- // for detecting if chrlst entirely mismatch with what's in the genome build
836
- // TODO replace contigNameNoChr
837
- let nochrcount = 0,
838
- haschrcount = 0
839
- for (const n in genome.majorchr) {
840
- if (chrlst.includes(n)) {
841
- haschrcount++
842
- } else if (chrlst.includes(n.replace('chr', ''))) {
843
- nochrcount++
844
- }
845
- }
846
- if (genome.minorchr) {
847
- for (const n in genome.minorchr) {
848
- if (chrlst.includes(n)) {
849
- haschrcount++
850
- } else if (chrlst.includes(n.replace('chr', ''))) {
851
- nochrcount++
852
- }
853
- }
854
- }
855
- return [nochrcount, haschrcount]
856
- }
857
-
858
- export function getMax_byiqr(lst, novaluemax) {
859
- /*
860
- lst: array of numbers
861
- novaluemax: when lst is empty, return this value
862
- cutoff value based on IQR to exclude outlier values
863
- */
864
- if (lst.length == 0) return novaluemax
865
- lst.sort((i, j) => i - j)
866
- const max = lst[lst.length - 1]
867
- if (lst.length <= 5) return max
868
- const q1 = lst[Math.floor(lst.length / 4)]
869
- const q2 = lst[Math.floor((lst.length * 3) / 4)]
870
- return Math.min(q2 + (q2 - q1) * 1.5, max)
871
- }
872
-
873
- export function alleleInGenotypeStr(genotype, allele) {
874
- if (!genotype) return false
875
- if (genotype.indexOf('/') != -1) {
876
- return genotype.split('/').indexOf(allele) != -1
877
- }
878
- return genotype.split('|').indexOf(allele) != -1
879
- }
880
-
881
- export const gmmode = {
882
- genomic: 'genomic',
883
- splicingrna: 'splicing RNA', // if just 1 exon, use "RNA" as label
884
- exononly: 'exon only',
885
- protein: 'protein',
886
- gmsum: 'aggregated exons'
887
- }
888
-
889
- /*
890
- input:
891
-
892
- m={}
893
- m.csq=[]
894
- element: {
895
- Allele: str,
896
- Consequence: str,
897
- CANONICAL: str, // true if _isoform is canonical
898
- ...
899
- _isoform: str,
900
- _class: str,
901
- _csqrank: int
902
- }
903
- m.ann=[]
904
- annovar output. may be derelict
905
- block={}
906
- block.usegm={ isoform }
907
- can be a mock object when running this function in node!
908
-
909
- does:
910
- find an annotation from m.csq[] that's fitting the circumstance
911
- - current gm isoform displayed in block gene mode
912
- - any canonical isoform from m.csq[] (can be missing if vep is not instructed to do it)
913
- - one with highest _csqrank
914
- then, copy its class/mname to m{}
915
- has many fall-back and always try to assign class/mname
916
-
917
- no return
918
- */
919
- export function vcfcopymclass(m, block) {
920
- if (m.csq) {
921
- let useone // point to the element of m.csq[], from this class/mname is copied to m{}
922
-
923
- if (block.usegm) {
924
- // block is in gm mode, find a csq matching with the genemodel isoform
925
- useone = m.csq.find(i => i._isoform == block.usegm.isoform)
926
- }
927
-
928
- if (!useone) {
929
- // no match to usegm isoform; can be due to in genomic mode and zoomed out, where this variant is from a neighboring gene near block.usegm
930
- // find one using canonical isoform
931
- useone = m.csq.find(i => i.CANONICAL)
932
-
933
- if (!useone) {
934
- // none of the elements in m.csq[] is using a canonical isoform, as that's a vep optional output
935
- // last method: choose *colorful* annotation based on if is canonical, _csqrank
936
- useone = m.csq[0]
937
- for (const q of m.csq) {
938
- if (q._csqrank < useone._csqrank) {
939
- useone = q
940
- }
941
- }
942
- }
943
- }
944
-
945
- if (useone) {
946
- m.gene = useone._gene
947
- m.isoform = useone._isoform
948
- m.class = useone._class
949
- m.dt = useone._dt
950
- m.mname = useone._mname
951
-
952
- if (m.class == mclassnoncoding) {
953
- // noncoding converted from csq is not a meaningful, drab color, has no mname label, delete so later will be converted to non-protein class
954
- delete m.class
955
- }
956
- }
957
- } else if (m.ann) {
958
- // there could be many applicable annotations, the first one not always desirable
959
- // choose *colorful* annotation based on _csqrank
960
- let useone = null
961
- if (block.usegm) {
962
- for (const q of m.ann) {
963
- if (q._isoform != block.usegm.isoform) continue
964
- if (useone) {
965
- if (q._csqrank < useone._csqrank) {
966
- useone = q
967
- }
968
- } else {
969
- useone = q
970
- }
971
- }
972
- if (!useone && block.gmmode == gmmode.genomic) {
973
- // no match to this gene, but in genomic mode, maybe from other genes?
974
- useone = m.ann[0]
975
- }
976
- } else {
977
- useone = m.ann[0]
978
- for (const q of m.ann) {
979
- if (q._csqrank < useone._csqrank) {
980
- useone = q
981
- }
982
- }
983
- }
984
- if (useone) {
985
- m.gene = useone._gene
986
- m.isoform = useone._isoform
987
- m.class = useone._class
988
- m.dt = useone._dt
989
- m.mname = useone._mname
990
-
991
- if (m.class == mclassnoncoding) {
992
- delete m.class
993
- }
994
- }
995
- }
996
-
997
- if (m.class == undefined) {
998
- // infer class from m.type, which was assigned by vcf.js
999
- if (mclass[m.type]) {
1000
- m.class = m.type
1001
- m.dt = mclass[m.type].dt
1002
- m.mname = m.id && m.id != '.' ? m.id : m.ref + '>' + m.alt
1003
- if (m.mname.length > 15) {
1004
- // avoid long indel
1005
- m.mname = m.type
1006
- }
1007
- } else {
1008
- m.class = mclassnonstandard
1009
- m.dt = dtsnvindel
1010
- m.mname = m.type
1011
- }
1012
- }
1013
-
1014
- delete m.type
1015
- }
1016
-
1017
- /*
1018
- used in:
1019
- mdssvcnv track, mutation attributes, items that are not annotated by an attribute for showing in legend, and server-side filtering
1020
- */
1021
- export const not_annotated = 'Unannotated'
1022
-
1023
- // kernal density estimator as from https://www.d3-graph-gallery.com/graph/density_basic.html
1024
-
1025
- export function kernelDensityEstimator(kernel, X) {
1026
- return function (V) {
1027
- return X.map(x => {
1028
- return [x, V.map(v => kernel(x - v)).reduce((i, j) => i + j, 0) / V.length]
1029
- })
1030
- }
1031
- }
1032
-
1033
- export function kernelEpanechnikov(k) {
1034
- return function (v) {
1035
- return Math.abs((v /= k)) <= 1 ? (0.75 * (1 - v * v)) / k : 0
1036
- }
1037
- }
1038
-
1039
- export const schemeCategory20 = [
1040
- '#1f77b4',
1041
- '#aec7e8',
1042
- '#ff7f0e',
1043
- '#ffbb78',
1044
- '#2ca02c',
1045
- '#98df8a',
1046
- '#d62728',
1047
- '#ff9896',
1048
- '#9467bd',
1049
- '#c5b0d5',
1050
- '#8c564b',
1051
- '#c49c94',
1052
- '#e377c2',
1053
- '#f7b6d2',
1054
- '#7f7f7f',
1055
- '#c7c7c7',
1056
- '#bcbd22',
1057
- '#dbdb8d',
1058
- '#17becf',
1059
- '#9edae5'
1060
- ]
1061
- export const schemeCategory2 = ['#e75480', 'blue']
1062
-
1063
- export function getColorScheme(number) {
1064
- if (number > 20) {
1065
- const scheme = []
1066
- for (let i = 0; i < number; i++) scheme.push(d3.interpolateRainbow(i / number))
1067
- return scheme
1068
- }
1069
- if (number > 12) return schemeCategory20
1070
- else if (number > 8) return d3.schemePaired
1071
- else if (number > 2) return d3.schemeDark2
1072
- else return schemeCategory2
1073
- }
1074
- export function getColors(number) {
1075
- const scheme = getColorScheme(number)
1076
- return d3scale.scaleOrdinal(scheme)
1077
- }
1078
-
1079
- export const truncatingMutations = ['F', 'N', 'D', 'I', 'L']
1080
- export const proteinChangingMutations = ['F', 'N', 'D', 'I', 'L', 'M', 'P']