@sjcrh/proteinpaint-shared 2.186.0 → 2.188.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/README.md +10 -2
  2. package/constants/AiHisto.ts +27 -0
  3. package/constants/README.md +11 -0
  4. package/devTs.ts +3 -0
  5. package/dist/constants/AiHisto.d.ts +23 -0
  6. package/dist/constants/AiHisto.js +31 -0
  7. package/dist/constants/AiHisto.js.map +7 -0
  8. package/dist/src/aiHisto.d.ts +5 -0
  9. package/dist/src/aiHisto.js +15 -0
  10. package/dist/src/aiHisto.js.map +7 -0
  11. package/dist/src/bulk.cnv.js +83 -0
  12. package/dist/src/bulk.cnv.js.map +7 -0
  13. package/dist/src/bulk.del.js +119 -0
  14. package/dist/src/bulk.del.js.map +7 -0
  15. package/dist/src/bulk.itd.js +119 -0
  16. package/dist/src/bulk.itd.js.map +7 -0
  17. package/dist/src/bulk.js +183 -0
  18. package/dist/src/bulk.js.map +7 -0
  19. package/dist/src/bulk.snv.js +175 -0
  20. package/dist/src/bulk.snv.js.map +7 -0
  21. package/dist/src/bulk.sv.js +266 -0
  22. package/dist/src/bulk.sv.js.map +7 -0
  23. package/dist/src/bulk.svjson.js +151 -0
  24. package/dist/src/bulk.svjson.js.map +7 -0
  25. package/dist/src/bulk.trunc.js +122 -0
  26. package/dist/src/bulk.trunc.js.map +7 -0
  27. package/dist/src/clustering.js +71 -0
  28. package/dist/src/clustering.js.map +7 -0
  29. package/dist/src/common.js +1302 -0
  30. package/dist/src/common.js.map +7 -0
  31. package/dist/src/compute.percentile.js +10 -0
  32. package/dist/src/compute.percentile.js.map +7 -0
  33. package/dist/src/doc.d.ts +7 -0
  34. package/dist/src/doc.js +10 -0
  35. package/dist/src/doc.js.map +7 -0
  36. package/dist/src/fetch-helpers.js +177 -0
  37. package/dist/src/fetch-helpers.js.map +7 -0
  38. package/dist/src/fileSize.js +10 -0
  39. package/dist/src/fileSize.js.map +7 -0
  40. package/dist/src/filter.d.ts +62 -0
  41. package/dist/src/filter.js +194 -0
  42. package/dist/src/filter.js.map +7 -0
  43. package/dist/src/hash.js +20 -0
  44. package/dist/src/hash.js.map +7 -0
  45. package/dist/src/helpers.js +66 -0
  46. package/dist/src/helpers.js.map +7 -0
  47. package/dist/src/index.d.ts +26 -0
  48. package/dist/src/index.js +27 -0
  49. package/dist/src/index.js.map +7 -0
  50. package/dist/src/joinUrl.d.ts +1 -0
  51. package/dist/src/joinUrl.js +17 -0
  52. package/dist/src/joinUrl.js.map +7 -0
  53. package/dist/src/mds3tk.js +64 -0
  54. package/dist/src/mds3tk.js.map +7 -0
  55. package/dist/src/roundValue.js +57 -0
  56. package/dist/src/roundValue.js.map +7 -0
  57. package/dist/src/termdb.bins.js +272 -0
  58. package/dist/src/termdb.bins.js.map +7 -0
  59. package/dist/src/termdb.initbinconfig.js +79 -0
  60. package/dist/src/termdb.initbinconfig.js.map +7 -0
  61. package/dist/src/termdb.usecase.js +239 -0
  62. package/dist/src/termdb.usecase.js.map +7 -0
  63. package/dist/src/terms.d.ts +83 -0
  64. package/dist/src/terms.js +327 -0
  65. package/dist/src/terms.js.map +7 -0
  66. package/dist/src/time.d.ts +9 -0
  67. package/dist/src/time.js +23 -0
  68. package/dist/src/time.js.map +7 -0
  69. package/dist/src/tree.js +82 -0
  70. package/dist/src/tree.js.map +7 -0
  71. package/dist/src/urljson.d.ts +8 -0
  72. package/dist/src/urljson.js +31 -0
  73. package/dist/src/urljson.js.map +7 -0
  74. package/dist/src/vcf.ann.js +56 -0
  75. package/dist/src/vcf.ann.js.map +7 -0
  76. package/dist/src/vcf.csq.js +82 -0
  77. package/dist/src/vcf.csq.js.map +7 -0
  78. package/dist/src/vcf.info.js +40 -0
  79. package/dist/src/vcf.info.js.map +7 -0
  80. package/dist/src/vcf.js +439 -0
  81. package/dist/src/vcf.js.map +7 -0
  82. package/dist/src/vcf.type.js +17 -0
  83. package/dist/src/vcf.type.js.map +7 -0
  84. package/package.json +20 -11
  85. package/src/bulk.cnv.js +0 -86
  86. package/src/bulk.del.js +0 -124
  87. package/src/bulk.itd.js +0 -123
  88. package/src/bulk.js +0 -197
  89. package/src/bulk.snv.js +0 -271
  90. package/src/bulk.sv.js +0 -276
  91. package/src/bulk.svjson.js +0 -164
  92. package/src/bulk.trunc.js +0 -132
  93. package/src/clustering.js +0 -66
  94. package/src/common.js +0 -1608
  95. package/src/compute.percentile.js +0 -11
  96. package/src/doc.js +0 -6
  97. package/src/fetch-helpers.js +0 -323
  98. package/src/fileSize.js +0 -6
  99. package/src/filter.js +0 -221
  100. package/src/hash.js +0 -21
  101. package/src/helpers.js +0 -88
  102. package/src/index.js +0 -26
  103. package/src/joinUrl.js +0 -14
  104. package/src/mds3tk.js +0 -100
  105. package/src/roundValue.js +0 -94
  106. package/src/termdb.bins.js +0 -456
  107. package/src/termdb.initbinconfig.js +0 -130
  108. package/src/termdb.usecase.js +0 -317
  109. package/src/terms.js +0 -341
  110. package/src/time.js +0 -22
  111. package/src/tree.js +0 -138
  112. package/src/urljson.js +0 -41
  113. package/src/vcf.ann.js +0 -62
  114. package/src/vcf.csq.js +0 -153
  115. package/src/vcf.info.js +0 -50
  116. package/src/vcf.js +0 -654
  117. package/src/vcf.type.js +0 -24
package/src/common.js DELETED
@@ -1,1608 +0,0 @@
1
- /*
2
- shared between client and server
3
-
4
- exported functions
5
- - bplen()
6
- - mclasstester()
7
- - basecompliment()
8
-
9
-
10
- */
11
- import { rgb } from "d3-color"
12
- import * as d3scale from "d3-scale"
13
- import * as d3 from "d3"
14
- import { getWrappedTvslst } from "./filter.js"
15
-
16
- // moved from `#shared/terms` to here, so that this can be passed as
17
- // part of 'common' argument to exported dataset js function, at server runtime
18
- // 3/30/2026 - changed from literal object to a class with static properties
19
- // to make it easier for IDEs and tsc compiler to catch typos in consumer code
20
- export class TermTypeGroups {
21
- static DICTIONARY_VARIABLES = "Dictionary Variables"
22
- static DNA_METHYLATION = "DNA Methylation"
23
- static GENE_DEPENDENCY = "Gene Dependency"
24
- static GENE_EXPRESSION = "Gene Expression"
25
- static ISOFORM_EXPRESSION = "Isoform Expression"
26
- static GSEA = "GSEA"
27
- static METABOLITE_INTENSITY = "Metabolite Intensity"
28
- static PROTEOME_ABUNDANCE = "Proteome Abundance"
29
- static MUTATION_CNV_FUSION = "Mutation/CNV/Fusion"
30
- static MUTATION_SIGNATURE = "Mutation Signature"
31
- static PROTEIN_EXPRESSION = "Protein Expression"
32
- static SINGLECELL_CELLTYPE = "Single-cell Cell Type"
33
- static SINGLECELL_GENE_EXPRESSION = "Single-cell Gene Expression"
34
- static SNP = "SNP Genotype"
35
- static SNP_LIST = "SNP List"
36
- static SNP_LOCUS = "SNP Locus"
37
- static SPLICE_JUNCTION = "Splice Junction"
38
- static SSGSEA = "Geneset Expression"
39
- static TERM_COLLECTION = "Term Collection"
40
- static VARIANT_GENOTYPE = "Variant Genotype"
41
- }
42
- // freeze so that mutating any of the static properties above will throw at runtime
43
- Object.freeze(TermTypeGroups)
44
-
45
- export const defaultcolor = rgb("#8AB1D4").darker()
46
- export const default_text_color = rgb("#aaa").darker().darker()
47
-
48
- export const exoncolor = "#4F8053"
49
- export const plotColor = "#ce768e"
50
-
51
- // something that has something to do with coding gene reading frame
52
- export const IN_frame = true
53
- export const OUT_frame = false
54
-
55
- /********************************
56
- * on dt usage *
57
- *********************************
58
- - once a dt value is decided and used with actual dataset,
59
- the value must not be altered, since dataset file may hardcode such value and reassigning to a new integer will break!
60
- - never test dt value by range e.g. if(dt>10), it breaks! only test equality!
61
- - in code import variable from here and DO NOT use literal values, to make code tractable
62
- */
63
- export const dtsnvindel = 1
64
- export const dtfusionrna = 2
65
- export const dtgeneexpression = 3
66
- export const dtcnv = 4
67
- export const dtsv = 5
68
- export const dtitd = 6
69
- export const dtdel = 7
70
- export const dtnloss = 8
71
- export const dtcloss = 9
72
- export const dtloh = 10
73
- export const dtmetaboliteintensity = 11
74
- export const dtssgsea = 12
75
- export const dtdnamethylation = 13
76
- export const dtproteomeabundance = 14
77
- // add new dt value here. !!!DO NOT change value of existing dt!!!
78
-
79
- export const dt2label = {
80
- [dtsnvindel]: "SNV/indel",
81
- [dtfusionrna]: "Fusion RNA",
82
- [dtcnv]: "CNV",
83
- [dtsv]: "SV",
84
- [dtitd]: "ITD",
85
- [dtdel]: "Deletion",
86
- [dtnloss]: "N-loss",
87
- [dtcloss]: "C-loss",
88
- [dtloh]: "LOH",
89
- [dtgeneexpression]: "Gene Expression",
90
- [dtmetaboliteintensity]: "Metabolite Intensity",
91
- [dtproteomeabundance]: "Proteome Abundance",
92
- }
93
-
94
- // Maps dt types to UI labels and lesion types for GRIN2
95
- // All dt types use lesionTypes array for consistency
96
- export const dt2lesion = {
97
- [dtsnvindel]: {
98
- uilabel: "SNV/INDEL (Mutation)",
99
- lesionTypes: [
100
- { name: "Mutation", lesionType: "mutation", color: "#44AA44" },
101
- ],
102
- },
103
- [dtcnv]: {
104
- uilabel: "CNV (Copy Number Variation)",
105
- lesionTypes: [
106
- { name: "Loss", lesionType: "loss", color: "#4444FF" },
107
- { name: "Gain", lesionType: "gain", color: "#FF4444" },
108
- ],
109
- },
110
- [dtsv]: {
111
- uilabel: "SV (Structural Variation)",
112
- lesionTypes: [{ name: "SV", lesionType: "sv", color: "#9932CC" }],
113
- },
114
- [dtfusionrna]: {
115
- uilabel: "Fusion (RNA Fusion)",
116
- lesionTypes: [{ name: "Fusion", lesionType: "fusion", color: "#FFA500" }],
117
- },
118
- }
119
-
120
- // Maps GRIN2 option types to their corresponding dt values
121
- export const optionToDt = {
122
- snvindelOptions: dtsnvindel,
123
- cnvOptions: dtcnv,
124
- fusionOptions: dtfusionrna,
125
- svOptions: dtsv,
126
- }
127
-
128
- export const mclass = {
129
- M: {
130
- label: "MISSENSE",
131
- color: "#3987CC",
132
- dt: dtsnvindel,
133
- desc: "A sequence variant, that changes one or more bases, resulting in a different amino acid sequence but where the length is preserved",
134
- key: "M",
135
- },
136
- E: {
137
- label: "EXON",
138
- color: "#bcbd22",
139
- dt: dtsnvindel,
140
- desc: "A variant in the exon of a non-coding RNA.",
141
- key: "E",
142
- },
143
- F: {
144
- label: "FRAMESHIFT",
145
- color: "rgb(200, 61, 61)",
146
- dt: dtsnvindel,
147
- desc: "A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three",
148
- key: "F",
149
- },
150
- N: {
151
- label: "NONSENSE",
152
- color: "#ff7f0e",
153
- dt: dtsnvindel,
154
- desc: "A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript",
155
- key: "N",
156
- },
157
- S: {
158
- label: "SILENT",
159
- color: "#2ca02c",
160
- dt: dtsnvindel,
161
- desc: "A sequence variant where there is no resulting change to the encoded amino acid",
162
- key: "S",
163
- },
164
- D: {
165
- label: "PROTEINDEL",
166
- color: "rgb(100, 100, 100)",
167
- dt: dtsnvindel,
168
- desc: "An inframe non synonymous variant that deletes bases from the coding sequence",
169
- key: "D",
170
- },
171
- I: {
172
- label: "PROTEININS",
173
- color: "#8c564b",
174
- dt: dtsnvindel,
175
- desc: "An inframe non synonymous variant that inserts bases into in the coding sequence",
176
- key: "I",
177
- },
178
- ProteinAltering: {
179
- label: "PROTEINALTERING",
180
- color: "#5a0034",
181
- dt: dtsnvindel,
182
- desc: "An inframe complex change to the coding sequence",
183
- key: "ProteinAltering",
184
- },
185
- P: {
186
- label: "SPLICE_REGION",
187
- color: "#9467bd",
188
- dt: dtsnvindel,
189
- desc: "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron",
190
- key: "P",
191
- },
192
- L: {
193
- label: "SPLICE",
194
- color: "#6633FF",
195
- dt: dtsnvindel,
196
- desc: "A variant near an exon edge that may affect splicing functionality",
197
- key: "L",
198
- },
199
- Intron: {
200
- label: "INTRON",
201
- color: "#656565",
202
- dt: dtsnvindel,
203
- desc: "An intronic variant.",
204
- key: "Intron",
205
- },
206
-
207
- StopLost: {
208
- label: "Stop lost",
209
- color: "#ff7f0e",
210
- dt: dtsnvindel,
211
- desc: "A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript",
212
- key: "StopLost",
213
- },
214
- StartLost: {
215
- label: "Start lost",
216
- color: "#ff7f0e",
217
- dt: dtsnvindel,
218
- desc: "A codon variant that changes at least one base of the canonical start codon",
219
- key: "StartLost",
220
- },
221
-
222
- // quick fix!! for showing genes that are not tested in samples (e.g. gene panels) in the heatmap
223
- Blank: {
224
- label: "Not tested",
225
- color: "#fff",
226
- dt: dtsnvindel,
227
- desc: "This gene is not tested.",
228
- key: "Blank",
229
- },
230
-
231
- WT: {
232
- label: "Wildtype",
233
- color: "#D3D3D3",
234
- dt: dtsnvindel,
235
- desc: "Wildtype",
236
- key: "WT",
237
- },
238
- }
239
- export const mclassitd = "ITD"
240
- mclass[mclassitd] = {
241
- label: "ITD",
242
- color: "#ff70ff",
243
- dt: dtitd,
244
- desc: "In-frame internal tandem duplication.",
245
- key: mclassitd,
246
- }
247
-
248
- export const mclassdel = "DEL"
249
- mclass[mclassdel] = {
250
- label: "DELETION, intragenic",
251
- color: "#858585",
252
- dt: dtdel,
253
- desc: "Intragenic deletion.",
254
- key: mclassdel,
255
- }
256
-
257
- export const mclassnloss = "NLOSS"
258
- mclass[mclassnloss] = {
259
- label: "N-terminus loss",
260
- color: "#545454",
261
- dt: dtnloss,
262
- desc: "N-terminus loss due to translocation",
263
- key: mclassnloss,
264
- }
265
-
266
- export const mclasscloss = "CLOSS"
267
- mclass[mclasscloss] = {
268
- label: "C-terminus loss",
269
- color: "#545454",
270
- dt: dtcloss,
271
- desc: "C-terminus loss due to translocation",
272
- key: mclasscloss,
273
- }
274
-
275
- export const mclassutr3 = "Utr3"
276
- mclass[mclassutr3] = {
277
- label: "UTR_3",
278
- color: "#998199",
279
- dt: dtsnvindel,
280
- desc: "A variant in the 3' untranslated region.",
281
- key: mclassutr3,
282
- }
283
-
284
- export const mclassutr5 = "Utr5"
285
- mclass[mclassutr5] = {
286
- label: "UTR_5",
287
- color: "#819981",
288
- dt: dtsnvindel,
289
- desc: "A variant in the 5' untranslated region.",
290
- key: mclassutr5,
291
- }
292
-
293
- export const mclassnonstandard = "X"
294
- mclass[mclassnonstandard] = {
295
- label: "NONSTANDARD",
296
- color: "black",
297
- dt: dtsnvindel,
298
- desc: "A mutation class that either does not match our notation, or is unspecified.",
299
- key: mclassnonstandard,
300
- }
301
-
302
- export const mclassnoncoding = "noncoding"
303
- mclass[mclassnoncoding] = {
304
- label: "NONCODING",
305
- color: "black",
306
- dt: dtsnvindel,
307
- desc: "Noncoding mutation.",
308
- key: mclassnoncoding,
309
- }
310
-
311
- /*
312
- includes full list of consequences from
313
- https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html
314
- each entry is SO term with matching pp class
315
- entries that cannot be mapped to dtsnvindel classes are commented off
316
- order of entries is severity
317
-
318
- * since source of data! *
319
- from this array derives multiple types of lookup tables to perform mapping on both ways
320
- */
321
- const SOterms = [
322
- //transcript_ablation // not supported: 1) do not expect this in maf/vcf 2) should be represented as cnv deletion but not the legacy unused value "dtdel"; if needed can reenable
323
- ["splice_acceptor_variant", "L"],
324
- ["splice_donor_variant", "L"],
325
- ["stop_gained", "N"],
326
- ["frameshift_variant", "F"],
327
- ["stop_lost", "StopLost"],
328
- ["start_lost", "StartLost"],
329
- //transcript_amplification // not supported, should be represented by cnv instead
330
- ["feature_elongation", mclassnoncoding],
331
- ["feature_truncation", mclassnoncoding],
332
- ["inframe_insertion", "I"],
333
- ["inframe_deletion", "D"],
334
- ["missense_variant", "M"],
335
- ["protein_altering_variant", "ProteinAltering"],
336
- ["splice_donor_5th_base_variant", "P"],
337
- ["splice_region_variant", "P"],
338
- ["splice_donor_region_variant", "P"],
339
- ["splice_polypyrimidine_tract_variant", "P"],
340
- ["incomplete_terminal_codon_variant", "N"],
341
- ["start_retained_variant", "S"],
342
- ["stop_retained_variant", "S"],
343
- ["synonymous_variant", "S"],
344
- ["coding_sequence_variant", "E"],
345
- ["mature_miRNA_variant", "E"],
346
- ["5_prime_UTR_variant", mclassutr5],
347
- ["3_prime_UTR_variant", mclassutr3],
348
- ["non_coding_transcript_exon_variant", "E"],
349
- ["intron_variant", "Intron"],
350
- ["NMD_transcript_variant", "F"],
351
- ["non_coding_transcript_variant", "E"],
352
- ["coding_transcript_variant", "E"],
353
- ["upstream_gene_variant", mclassnoncoding],
354
- ["downstream_gene_variant", mclassnoncoding],
355
- ["TFBS_ablation", mclassnoncoding],
356
- ["TFBS_amplification", mclassnoncoding],
357
- ["TF_binding_site_variant", mclassnoncoding],
358
- ["regulatory_region_ablation", mclassnoncoding],
359
- ["regulatory_region_amplification", mclassnoncoding],
360
- ["regulatory_region_variant", mclassnoncoding],
361
- ["intergenic_variant", mclassnoncoding],
362
- ["sequence_variant", mclassnonstandard],
363
- ]
364
-
365
- // maps a pp class to an array of consequences
366
- // k: pp class, v: array of consequences. case sensitive
367
- export const class2SOterm = new Map()
368
- for (const [csq, cls] of SOterms) {
369
- if (!class2SOterm.has(cls)) class2SOterm.set(cls, [])
370
- class2SOterm.get(cls).push(csq)
371
- }
372
-
373
- // maps a consequence to a pp class. no severity info. use vepinfo() if needs severity
374
- // k: consequence, v: pp class. case sensitive.
375
- export const SOterm2class = new Map()
376
- for (const [csq, cls] of SOterms) {
377
- SOterm2class.set(csq, cls)
378
- }
379
-
380
- // outdated function to match with adhoc nonstandard notations, only used in legacy code utils/src/bulk.snv.js
381
- // DO NOT USE to map vep consequences
382
- export function mclasstester(s) {
383
- switch (s.toLowerCase()) {
384
- case "missense_mutation":
385
- return "M"
386
- case "nonsense_mutation":
387
- return "N"
388
- case "splice_site":
389
- return "L"
390
- case "splice_region":
391
- return "P"
392
- case "rna":
393
- return mclassnoncoding
394
- case "frame_shift_del":
395
- return "F"
396
- case "frame_shift_ins":
397
- return "F"
398
- case "in_frame_del":
399
- return "D"
400
- case "in_frame_ins":
401
- return "I"
402
- case "protein_altering_variant":
403
- return "ProteinAltering"
404
- case "translation_start_site":
405
- return mclassnonstandard
406
- case "nonstop_mutation":
407
- return "N"
408
- case "3'utr":
409
- return mclassutr3
410
- case "3'flank":
411
- return mclassnoncoding
412
- case "5'utr":
413
- return mclassutr5
414
- case "5'flank":
415
- return mclassnoncoding
416
- case "silent":
417
- return "S"
418
- case "blank":
419
- return "Blank"
420
- default:
421
- return null
422
- }
423
- }
424
-
425
- export const mclassfusionrna = "Fuserna"
426
- mclass[mclassfusionrna] = {
427
- label: "Fusion transcript",
428
- color: "#545454",
429
- dt: dtfusionrna,
430
- desc:
431
- "Marks the break points leading to fusion transcripts.<br>" +
432
- "<span style=\"font-size:150%\">&#9680;</span> - 3' end of the break point is fused to the 5' end of another break point in a different gene.<br>" +
433
- "<span style=\"font-size:150%\">&#9681;</span> - 5' end of the break point is fused to the 3' end of another break point in a different gene.",
434
- key: mclassfusionrna,
435
- }
436
- export const mclasssv = "SV"
437
- mclass[mclasssv] = {
438
- label: "Structural variation",
439
- color: "#858585",
440
- dt: dtsv,
441
- desc:
442
- "<span style=\"font-size:150%\">&#9680;</span> - 3' end of the break point is fused to the 5' end of another break point in a different gene.<br>" +
443
- "<span style=\"font-size:150%\">&#9681;</span> - 5' end of the break point is fused to the 3' end of another break point in a different gene.",
444
- key: mclasssv,
445
- }
446
-
447
- // "CNV_amp" represents "CNV Gain" and is used in both 2-category and 5-category CNV data representation
448
- // "CNV_amplification" represents CNV amplification and is used in 5-category CNV
449
- // "CNV_amp" have to stay as-is since it may be hardcoded in lots of data beyond portal code.
450
- export const mclasscnvgain = "CNV_amp"
451
- mclass[mclasscnvgain] = {
452
- label: "Copy number gain", // TODO change to 'Gain'
453
- color: "#e9a3c9",
454
- dt: dtcnv,
455
- desc: "Copy number gain",
456
- key: mclasscnvgain,
457
- }
458
-
459
- export const mclasscnvloss = "CNV_loss"
460
- mclass[mclasscnvloss] = {
461
- label: "Copy number loss",
462
- color: "#a1d76a",
463
- dt: dtcnv,
464
- desc: "Copy number loss",
465
- key: mclasscnvloss,
466
- }
467
-
468
- // mclasscnvAmp is next level above mclasscnvgain and is used in 5-category CNV data
469
- export const mclasscnvAmp = "CNV_amplification"
470
- mclass[mclasscnvAmp] = {
471
- label: "Copy number amplification",
472
- color: "#ff0000",
473
- dt: dtcnv,
474
- desc: "Copy number amplification",
475
- key: mclasscnvAmp,
476
- }
477
-
478
- export const mclasscnvHomozygousDel = "CNV_homozygous_deletion"
479
- mclass[mclasscnvHomozygousDel] = {
480
- label: "Copy number homozygous deletion",
481
- color: "#0000ff",
482
- dt: dtcnv,
483
- desc: "Copy number homozygous deletion",
484
- key: mclasscnvHomozygousDel,
485
- }
486
-
487
- export const mclasscnvloh = "CNV_loh"
488
- mclass[mclasscnvloh] = {
489
- label: "LOH",
490
- color: "#12EDFC",
491
- dt: dtcnv,
492
- desc: "Loss of heterozygosity",
493
- key: mclasscnvloh,
494
- }
495
-
496
- // for VCF
497
- export const mclasssnv = "snv"
498
- mclass[mclasssnv] = {
499
- label: "SNV",
500
- color: "#92a2d4",
501
- dt: dtsnvindel,
502
- desc: "Single nucleotide variation",
503
- key: mclasssnv,
504
- }
505
-
506
- export const mclassmnv = "mnv"
507
- mclass[mclassmnv] = {
508
- label: "MNV",
509
- color: "#92a2d4",
510
- dt: dtsnvindel,
511
- desc: "Multiple nucleotide variation",
512
- key: mclassmnv,
513
- }
514
-
515
- export const mclassinsertion = "insertion"
516
- mclass[mclassinsertion] = {
517
- label: "Sequence insertion",
518
- color: "#bd8e91",
519
- dt: dtsnvindel,
520
- desc: "Sequence insertion",
521
- key: mclassinsertion,
522
- }
523
-
524
- export const mclassdeletion = "deletion"
525
- mclass[mclassdeletion] = {
526
- label: "Sequence deletion",
527
- color: "#b5a174",
528
- dt: dtsnvindel,
529
- desc: "Sequence deletion",
530
- key: mclassdeletion,
531
- }
532
- // TODO complex indel
533
-
534
- /* tricky
535
- when a mds3 tk uses numeric cnv, data points from tk.cnv.cnvLst[] has .class=dtcnv but no class!
536
- a "cnv" entry needs to be present in mclass legend, and thus this wrapper function over mclass{} to allow dtcnv as key
537
- the tricky case doesn't apply to other plots
538
- */
539
- export function mds3tkMclass(k) {
540
- if (k == dtcnv) {
541
- return {
542
- color: "#858585",
543
- label: "CNV",
544
- desc: "Copy number variation",
545
- }
546
- }
547
- return mclass[k]
548
- }
549
-
550
- export const dt2color = {
551
- [dtsnvindel]: mclass.M.color, // general color for snvindel irrespective of class (when class is not available)
552
- // add new dt as needed
553
- }
554
-
555
- // option to override mutation class attribute values
556
- export function applyOverrides(overrides = {}) {
557
- if (overrides.mclass) {
558
- for (const key in overrides.mclass) {
559
- // allow to fill-in mutation class that are missing from mclass;
560
- // may be useful for things like 'Not tested', etc, that may not be in mclass by default
561
- // but are used by a customer with its own PP server instance
562
- if (!mclass[key]) mclass[key] = {}
563
- for (const subkey in overrides.mclass[key]) {
564
- mclass[key][subkey] = overrides.mclass[key][subkey]
565
- }
566
- }
567
- }
568
- }
569
-
570
- /* legacy function. input is comma-joined multiple consequences
571
- performs case insensitive match and returns severity rank
572
- TODO share data with SOterms
573
- */
574
- export const vepinfo = function (s) {
575
- const l = s.toLowerCase().split(",")
576
- let rank = 1
577
- if (l.indexOf("transcript_ablation") != -1) {
578
- // FIXME no class for whole gene deletion
579
- return [dtdel, mclassdel, rank]
580
- }
581
- rank++
582
- if (l.indexOf("splice_acceptor_variant") != -1) return [dtsnvindel, "L", rank]
583
- rank++
584
- if (l.indexOf("splice_donor_variant") != -1) return [dtsnvindel, "L", rank]
585
- rank++
586
- if (l.indexOf("stop_gained") != -1) return [dtsnvindel, "N", rank]
587
- rank++
588
- if (l.indexOf("frameshift_variant") != -1) return [dtsnvindel, "F", rank]
589
- rank++
590
- if (l.indexOf("stop_lost") != -1) return [dtsnvindel, "N", rank]
591
- rank++
592
- if (l.indexOf("start_lost") != -1) return [dtsnvindel, "N", rank]
593
- rank++
594
- if (l.indexOf("transcript_amplification") != -1) {
595
- // FIXME no class for whole gene amp
596
- return [dtsnvindel, mclassnonstandard, rank]
597
- }
598
- rank++
599
- if (
600
- l.indexOf("inframe_insertion") != -1 ||
601
- l.indexOf("conservative_inframe_insertion") != -1 ||
602
- l.indexOf("disruptive_inframe_insertion") != -1
603
- )
604
- return [dtsnvindel, "I", rank]
605
- rank++
606
- if (
607
- l.indexOf("inframe_deletion") != -1 ||
608
- l.indexOf("conservative_inframe_deletion") != -1 ||
609
- l.indexOf("disruptive_inframe_deletion") != -1
610
- )
611
- return [dtsnvindel, "D", rank]
612
- rank++
613
- if (l.indexOf("missense_variant") != -1) return [dtsnvindel, "M", rank]
614
- rank++
615
- if (l.indexOf("protein_altering_variant") != -1)
616
- return [dtsnvindel, "ProteinAltering", rank]
617
- rank++
618
- if (l.indexOf("splice_region_variant") != -1) return [dtsnvindel, "P", rank]
619
- rank++
620
- if (l.indexOf("incomplete_terminal_codon_variant") != -1)
621
- return [dtsnvindel, "N", rank]
622
- rank++
623
- if (l.indexOf("stop_retained_variant") != -1) return [dtsnvindel, "S", rank]
624
- rank++
625
- if (l.indexOf("synonymous_variant") != -1) return [dtsnvindel, "S", rank]
626
- rank++
627
- if (l.indexOf("coding_sequence_variant") != -1)
628
- return [dtsnvindel, mclassnonstandard, rank]
629
- rank++
630
- if (l.indexOf("mature_mirna_variant") != -1) return [dtsnvindel, "E", rank]
631
- rank++
632
- if (l.indexOf("5_prime_utr_variant") != -1)
633
- return [dtsnvindel, mclassutr5, rank]
634
- rank++
635
- if (l.indexOf("3_prime_utr_variant") != -1)
636
- return [dtsnvindel, mclassutr3, rank]
637
- rank++
638
- if (l.indexOf("non_coding_transcript_exon_variant") != -1)
639
- return [dtsnvindel, "E", rank]
640
- rank++
641
- if (l.indexOf("intron_variant") != -1) return [dtsnvindel, "Intron", rank]
642
- rank++
643
- if (l.indexOf("nmd_transcript_variant") != -1) return [dtsnvindel, "S", rank]
644
- rank++
645
- if (l.indexOf("non_coding_transcript_variant") != -1)
646
- return [dtsnvindel, "E", rank]
647
- rank++
648
- if (l.indexOf("upstream_gene_variant") != -1)
649
- return [dtsnvindel, mclassnoncoding, rank]
650
- rank++
651
- if (l.indexOf("downstream_gene_variant") != -1)
652
- return [dtsnvindel, mclassnoncoding, rank]
653
- rank++
654
- if (l.indexOf("tfbs_ablation") != -1)
655
- return [dtsnvindel, mclassnoncoding, rank]
656
- rank++
657
- if (l.indexOf("tfbs_amplification") != -1)
658
- return [dtsnvindel, mclassnoncoding, rank]
659
- rank++
660
- if (l.indexOf("tf_binding_site_variant") != -1)
661
- return [dtsnvindel, mclassnoncoding, rank]
662
- rank++
663
- if (l.indexOf("regulatory_region_ablation") != -1)
664
- return [dtsnvindel, mclassnoncoding, rank]
665
- rank++
666
- if (l.indexOf("regulatory_region_amplification") != -1)
667
- return [dtsnvindel, mclassnoncoding, rank]
668
- rank++
669
- if (l.indexOf("feature_elongation") != -1)
670
- return [dtsnvindel, mclassnoncoding, rank]
671
- rank++
672
- if (l.indexOf("regulatory_region_variant") != -1)
673
- return [dtsnvindel, mclassnoncoding, rank]
674
- rank++
675
- if (l.indexOf("feature_truncation") != -1)
676
- return [dtsnvindel, mclassnoncoding, rank]
677
- rank++
678
- if (l.indexOf("intergenic_variant") != -1)
679
- return [dtsnvindel, mclassnoncoding, rank]
680
- rank++
681
- return [dtsnvindel, mclassnonstandard, rank]
682
- }
683
-
684
- // m orgin
685
- export const germlinelegend =
686
- '<circle cx="7" cy="12" r="7" fill="#b1b1b1"></circle><path d="M6.735557395310443e-16,-11A11,11 0 0,1 11,0L9,0A9,9 0 0,0 5.51091059616309e-16,-9Z" transform="translate(7,12)" fill="#858585" stroke="none"></path>'
687
-
688
- export const morigin = {}
689
-
690
- export const moriginsomatic = "S"
691
- morigin[moriginsomatic] = {
692
- label: "Somatic",
693
- desc: "A variant found only in a tumor sample. The proportion is indicated by lack of any arc.",
694
- legend: '<circle cx="7" cy="12" r="7" fill="#b1b1b1"></circle>',
695
- }
696
- export const morigingermline = "G"
697
- morigin[morigingermline] = {
698
- label: "Germline",
699
- desc: "A constitutional variant found in a normal sample. The proportion is indicated by the span of the solid arc within the whole circle.",
700
- legend: germlinelegend,
701
- }
702
-
703
- morigin.germline = morigin[morigingermline]
704
- morigin.somatic = morigin[moriginsomatic]
705
-
706
- export const moriginrelapse = "R"
707
- morigin[moriginrelapse] = {
708
- label: "Relapse",
709
- desc: "A somatic variant found only in a relapse sample. The proportion is indicated by the span of the hollow arc within the whole circle.",
710
- legend:
711
- '<circle cx="7" cy="12" r="7" fill="#b1b1b1"></circle><path d="M6.735557395310443e-16,-11A11,11 0 0,1 11,0L9,0A9,9 0 0,0 5.51091059616309e-16,-9Z" transform="translate(7,12)" fill="none" stroke="#858585"></path>',
712
- }
713
- export const morigingermlinepathogenic = "GP"
714
- morigin[morigingermlinepathogenic] = {
715
- label: "Germline pathogenic",
716
- desc: "A constitutional variant with pathogenic allele.",
717
- legend: germlinelegend,
718
- }
719
- export const morigingermlinenonpathogenic = "GNP"
720
- morigin[morigingermlinenonpathogenic] = {
721
- label: "Germline non-pathogenic",
722
- desc: "A constitutional variant with non-pathogenic allele.",
723
- legend: germlinelegend,
724
- hidden: true,
725
- }
726
-
727
- export const tkt = {
728
- usegm: "usegm",
729
- ds: "dataset",
730
- bigwig: "bigwig",
731
- bigwigstranded: "bigwigstranded",
732
- junction: "junction",
733
- mdsjunction: "mdsjunction",
734
- mdssvcnv: "mdssvcnv", // replaced by mds3
735
- mdsexpressionrank: "mdsexpressionrank",
736
- mdsvcf: "mdsvcf", // for snv/indels, currently vcf, may include MAF
737
- //mdsgeneral:'mdsgeneral', // replaces mdssvcnv ****** not ready yet
738
- bedj: "bedj",
739
- pgv: "profilegenevalue",
740
- bampile: "bampile",
741
- hicstraw: "hicstraw",
742
- expressionrank: "expressionrank",
743
- aicheck: "aicheck",
744
- ase: "ase",
745
- mds3: "mds3", //
746
- bedgraphdot: "bedgraphdot",
747
- bam: "bam",
748
- ld: "ld",
749
- }
750
-
751
- export function validtkt(what) {
752
- for (const k in tkt) {
753
- if (what == tkt[k]) {
754
- return true
755
- }
756
- }
757
- return false
758
- }
759
-
760
- /*
761
- member track types from mdsvcf
762
- to get rid of hardcoded strings
763
- in future may include MAF format files
764
- */
765
- export const mdsvcftype = {
766
- vcf: "vcf",
767
- }
768
-
769
- /*
770
- for custom mdssvcnv track
771
- or general track
772
- to avoid using hard-coded string
773
- */
774
- export const custommdstktype = {
775
- vcf: "vcf",
776
- svcnvitd: "svcnvitd",
777
- geneexpression: "geneexpression",
778
- }
779
-
780
- // codons that are not here are stop codon!!
781
- export const codon = {
782
- GCT: "A",
783
- GCC: "A",
784
- GCA: "A",
785
- GCG: "A",
786
- CGT: "R",
787
- CGC: "R",
788
- CGA: "R",
789
- CGG: "R",
790
- AGA: "R",
791
- AGG: "R",
792
- AAT: "N",
793
- AAC: "N",
794
- GAT: "D",
795
- GAC: "D",
796
- TGT: "C",
797
- TGC: "C",
798
- CAA: "Q",
799
- CAG: "Q",
800
- GAA: "E",
801
- GAG: "E",
802
- GGT: "G",
803
- GGC: "G",
804
- GGA: "G",
805
- GGG: "G",
806
- CAT: "H",
807
- CAC: "H",
808
- ATT: "I",
809
- ATC: "I",
810
- ATA: "I",
811
- TTA: "L",
812
- TTG: "L",
813
- CTT: "L",
814
- CTC: "L",
815
- CTA: "L",
816
- CTG: "L",
817
- AAA: "K",
818
- AAG: "K",
819
- ATG: "M",
820
- TTT: "F",
821
- TTC: "F",
822
- CCT: "P",
823
- CCC: "P",
824
- CCA: "P",
825
- CCG: "P",
826
- TCT: "S",
827
- TCC: "S",
828
- TCA: "S",
829
- TCG: "S",
830
- AGT: "S",
831
- AGC: "S",
832
- ACT: "T",
833
- ACC: "T",
834
- ACA: "T",
835
- ACG: "T",
836
- TGG: "W",
837
- TAT: "Y",
838
- TAC: "Y",
839
- GTT: "V",
840
- GTC: "V",
841
- GTA: "V",
842
- GTG: "V",
843
- }
844
-
845
- export const codon_stop = "*"
846
-
847
- export function nt2aa(gm) {
848
- // must convert genome seq to upper case!!!
849
- if (!gm.genomicseq) return undefined
850
- const enlst = []
851
- if (gm.coding) {
852
- for (const [i, e] of gm.coding.entries()) {
853
- const s = gm.genomicseq.substr(e[0] - gm.start, e[1] - e[0])
854
- if (gm.strand == "-") {
855
- enlst.push(reversecompliment(s))
856
- } else {
857
- enlst.push(s)
858
- }
859
- }
860
- }
861
- const nt = enlst.join("")
862
- const pep = []
863
-
864
- /*
865
- if startCodonFrame is set, will not begin translation from first nt, but will skip 1 or 2 nt at the beginning
866
- in case of IGKC, frame=1 means it will borrow 1 nt from the previous IGKJ exons
867
- so the first two nucleotides from the current exon will have to be skipped when translating IGKC alone
868
- */
869
- const startntidx = gm.startCodonFrame ? 3 - gm.startCodonFrame : 0
870
- for (let i = startntidx; i < nt.length; i += 3) {
871
- const a = codon[nt.substr(i, 3)]
872
- pep.push(a || codon_stop)
873
- }
874
- gm.cdseq = nt
875
- return pep.join("")
876
- }
877
-
878
- export function bplen(len, isfile) {
879
- // if "isfile" is true, to measure file size instead of basepair len
880
- if (len >= 1000000000) return (len / 1000000000).toFixed(1) + " Gb"
881
- if (len >= 10000000) return Math.ceil(len / 1000000) + " Mb"
882
- if (len >= 1000000) return (len / 1000000).toFixed(1) + " Mb"
883
- if (len >= 10000) return Math.ceil(len / 1000) + " Kb"
884
- if (len >= 1000) return (len / 1000).toFixed(1) + " Kb"
885
- return len + (isfile ? "bytes" : " bp")
886
- }
887
-
888
- export const basecolor = {
889
- A: "#ca0020",
890
- T: "#f4a582",
891
- C: "#92c5de",
892
- G: "#0571b0",
893
- }
894
-
895
- export function basecompliment(nt) {
896
- switch (nt) {
897
- case "A":
898
- return "T"
899
- case "T":
900
- return "A"
901
- case "C":
902
- return "G"
903
- case "G":
904
- return "C"
905
- case "a":
906
- return "t"
907
- case "t":
908
- return "a"
909
- case "c":
910
- return "g"
911
- case "g":
912
- return "c"
913
- default:
914
- return nt
915
- }
916
- }
917
-
918
- export function reversecompliment(s) {
919
- const tmp = []
920
- for (let i = s.length - 1; i >= 0; i--) {
921
- tmp.push(basecompliment(s[i]))
922
- }
923
- return tmp.join("")
924
- }
925
-
926
- export function spliceeventchangegmexon(gm, evt) {
927
- /*
928
- alter gm.coding[], by exon-skip/alt events
929
- for frame checking
930
- gm must have coding
931
- */
932
- const gm2 = {
933
- chr: gm.chr,
934
- start: gm.start,
935
- stop: gm.stop,
936
- strand: gm.strand,
937
- coding: [],
938
- }
939
- if (evt.isskipexon || evt.isaltexon) {
940
- for (let i = 0; i < gm.exon.length; i++) {
941
- const codingstart = Math.max(gm.codingstart, gm.exon[i][0])
942
- const codingstop = Math.min(gm.codingstop, gm.exon[i][1])
943
- if (codingstart > codingstop) {
944
- // not coding exon
945
- continue
946
- }
947
- if (evt.skippedexon.indexOf(i) == -1) {
948
- // not skipped
949
- gm2.coding.push([codingstart, codingstop])
950
- } else {
951
- // skipped
952
- }
953
- }
954
- } else if (evt.a5ss || evt.a3ss) {
955
- // still equal number of exons
956
- // adjust the affected exon first, then figure out coding[]
957
- const exons = gm.exon.map((e) => [e[0], e[1]])
958
- const forward = gm.strand == "+"
959
- if (evt.a5ss) {
960
- if (forward) {
961
- exons[evt.exon5idx][1] = evt.junctionB.start
962
- } else {
963
- exons[evt.exon5idx + 1][0] = evt.junctionB.stop
964
- }
965
- } else {
966
- if (forward) {
967
- exons[evt.exon5idx + 1][0] = evt.junctionB.stop
968
- } else {
969
- exons[evt.exon5idx][1] = evt.junctionB.start
970
- }
971
- }
972
- // from new exons, figure out coding exons
973
- for (const e of exons) {
974
- const codingstart = Math.max(gm.codingstart, e[0])
975
- const codingstop = Math.min(gm.codingstop, e[1])
976
- if (codingstart > codingstop) {
977
- // not coding exon
978
- continue
979
- }
980
- gm2.coding.push([codingstart, codingstop])
981
- }
982
- }
983
- return gm2
984
- }
985
-
986
- export function fasta2gmframecheck(gm, str) {
987
- /*
988
- gm{}
989
- .chr
990
- .start
991
- .stop
992
- start/stop is transcript position
993
- .strand
994
- .coding[]
995
- str
996
- samtools faidx output
997
- */
998
- const lines = str.split("\n")
999
- // remove fasta header
1000
- lines.shift()
1001
- gm.genomicseq = lines.join("").toUpperCase()
1002
-
1003
- const aaseq = nt2aa(gm)
1004
-
1005
- let thisframe = OUT_frame
1006
- const stopcodonidx = aaseq.indexOf(codon_stop)
1007
- if (stopcodonidx == aaseq.length - 1) {
1008
- // the first appearance of stop codon is at the last of translation
1009
- thisframe = IN_frame
1010
- }
1011
- return thisframe
1012
- }
1013
-
1014
- export function validate_vcfinfofilter(obj) {
1015
- /*
1016
- validate vcfinfofilter as from embedding api or dataset
1017
- */
1018
-
1019
- if (!obj.lst) return ".lst missing"
1020
-
1021
- if (!Array.isArray(obj.lst)) return "input is not an array"
1022
-
1023
- for (const set of obj.lst) {
1024
- if (!set.name) return "name missing from a set of .vcfinfofilter.lst"
1025
-
1026
- if (set.autocategory || set.categories) {
1027
- // categorical info, auto or defined
1028
-
1029
- if (!set.autocategory) {
1030
- for (const k in set.categories) {
1031
- const v = set.categories[k]
1032
- if (!set.autocolor && !v.color)
1033
- return (
1034
- ".color missing for class " +
1035
- k +
1036
- " from .categories of set " +
1037
- set.name
1038
- )
1039
- if (!v.label) {
1040
- v.label = k
1041
- }
1042
- }
1043
- }
1044
-
1045
- if (set.categoryhidden) {
1046
- for (const k in set.categoryhidden) {
1047
- if (!set.categories[k])
1048
- return (
1049
- "unknown hidden-by-default category " +
1050
- k +
1051
- " from set " +
1052
- set.name
1053
- )
1054
- }
1055
- } else {
1056
- set.categoryhidden = {}
1057
- }
1058
- } else if (set.numericfilter) {
1059
- // otherwise, numerical value, the style of population frequency filter
1060
- const lst = []
1061
- for (const v of set.numericfilter) {
1062
- if (typeof v == "number") {
1063
- /*
1064
- just a number, defaults to 'lower-than'
1065
- */
1066
- lst.push({ side: "<", value: v })
1067
- } else {
1068
- lst.push({
1069
- side: v.side || "<",
1070
- value: v.value,
1071
- })
1072
- }
1073
- }
1074
- set.numericfilter = lst
1075
-
1076
- //return 'no .categories or .numericfilter from set '+set.name
1077
- }
1078
-
1079
- if (set.altalleleinfo) {
1080
- if (!set.altalleleinfo.key) {
1081
- return ".key missing from .altalleleinfo from set " + set.name
1082
- }
1083
- } else if (set.locusinfo) {
1084
- if (!set.locusinfo.key) {
1085
- return ".key missing from .locusinfo from set " + set.name
1086
- }
1087
- } else {
1088
- return (
1089
- "neither .altalleleinfo or .locusinfo is available from set " + set.name
1090
- )
1091
- }
1092
- }
1093
- }
1094
-
1095
- export function contigNameNoChr(genome, chrlst) {
1096
- /*
1097
- FIXME hard-coded for human genome styled chromosome names
1098
- */
1099
- for (const n in genome.majorchr) {
1100
- if (chrlst.indexOf(n.replace("chr", "")) != -1) {
1101
- return true
1102
- }
1103
- }
1104
- if (genome.minorchr) {
1105
- for (const n in genome.minorchr) {
1106
- if (chrlst.indexOf(n.replace("chr", "")) != -1) {
1107
- return true
1108
- }
1109
- }
1110
- }
1111
- return false
1112
- }
1113
- export function contigNameNoChr2(genome, chrlst) {
1114
- // returns number of matching chr names that either includes "chr" or not
1115
- // for detecting if chrlst entirely mismatch with what's in the genome build
1116
- // TODO replace contigNameNoChr
1117
- let nochrcount = 0,
1118
- haschrcount = 0
1119
- for (const n in genome.majorchr) {
1120
- if (chrlst.includes(n)) {
1121
- haschrcount++
1122
- } else if (chrlst.includes(n.replace("chr", ""))) {
1123
- nochrcount++
1124
- }
1125
- }
1126
- if (genome.minorchr) {
1127
- for (const n in genome.minorchr) {
1128
- if (chrlst.includes(n)) {
1129
- haschrcount++
1130
- } else if (chrlst.includes(n.replace("chr", ""))) {
1131
- nochrcount++
1132
- }
1133
- }
1134
- }
1135
- return [nochrcount, haschrcount]
1136
- }
1137
-
1138
- export function getMax_byiqr(lst, novaluemax) {
1139
- /*
1140
- lst: array of numbers
1141
- novaluemax: when lst is empty, return this value
1142
- cutoff value based on IQR to exclude outlier values
1143
- */
1144
- if (lst.length == 0) return novaluemax
1145
- lst.sort((i, j) => i - j)
1146
- const max = lst[lst.length - 1]
1147
- if (lst.length <= 5) return max
1148
- const q1 = lst[Math.floor(lst.length / 4)]
1149
- const q2 = lst[Math.floor((lst.length * 3) / 4)]
1150
- return Math.min(q2 + (q2 - q1) * 1.5, max)
1151
- }
1152
-
1153
- export function alleleInGenotypeStr(genotype, allele) {
1154
- if (!genotype) return false
1155
- if (genotype.indexOf("/") != -1) {
1156
- return genotype.split("/").indexOf(allele) != -1
1157
- }
1158
- return genotype.split("|").indexOf(allele) != -1
1159
- }
1160
-
1161
- export const gmmode = {
1162
- genomic: "genomic",
1163
- splicingrna: "splicing RNA", // if just 1 exon, use "RNA" as label
1164
- exononly: "exon only",
1165
- protein: "protein",
1166
- gmsum: "aggregated exons",
1167
- }
1168
-
1169
- /*
1170
- input:
1171
-
1172
- m={}
1173
- m.csq=[]
1174
- element: {
1175
- Allele: str,
1176
- Consequence: str,
1177
- CANONICAL: str, // true if _isoform is canonical
1178
- ...
1179
- _isoform: str,
1180
- _class: str,
1181
- _csqrank: int
1182
- }
1183
- m.ann=[]
1184
- annovar output. may be derelict
1185
- block={}
1186
- block.usegm={ isoform }
1187
- can be a mock object when running this function in node!
1188
-
1189
- does:
1190
- find an annotation from m.csq[] that's fitting the circumstance
1191
- - current gm isoform displayed in block gene mode
1192
- - any canonical isoform from m.csq[] (can be missing if vep is not instructed to do it)
1193
- - one with highest _csqrank
1194
- then, copy its class/mname to m{}
1195
- has many fall-back and always try to assign class/mname
1196
-
1197
- no return
1198
- */
1199
- export function vcfcopymclass(m, block) {
1200
- if (m.csq) {
1201
- let useone // point to the element of m.csq[], from this class/mname is copied to m{}
1202
-
1203
- if (block.usegm) {
1204
- // block is in gm mode, find a csq matching with the genemodel isoform
1205
- useone = m.csq.find((i) => i._isoform == block.usegm.isoform)
1206
- }
1207
-
1208
- if (!useone) {
1209
- // no match to usegm isoform; can be due to in genomic mode and zoomed out, where this variant is from a neighboring gene near block.usegm
1210
- // find one using canonical isoform
1211
- useone = m.csq.find((i) => i.CANONICAL)
1212
-
1213
- if (!useone) {
1214
- // none of the elements in m.csq[] is using a canonical isoform, as that's a vep optional output
1215
- // last method: choose *colorful* annotation based on if is canonical, _csqrank
1216
- useone = m.csq[0]
1217
- for (const q of m.csq) {
1218
- if (q._csqrank < useone._csqrank) {
1219
- useone = q
1220
- }
1221
- }
1222
- }
1223
- }
1224
-
1225
- if (useone) {
1226
- m.gene = useone._gene
1227
- m.isoform = useone._isoform
1228
- m.class = useone._class
1229
- m.dt = useone._dt
1230
- m.mname = useone._mname
1231
-
1232
- if (m.class == mclassnoncoding) {
1233
- // noncoding converted from csq is not a meaningful, drab color, has no mname label, delete so later will be converted to non-protein class
1234
- delete m.class
1235
- }
1236
- }
1237
- } else if (m.ann) {
1238
- // there could be many applicable annotations, the first one not always desirable
1239
- // choose *colorful* annotation based on _csqrank
1240
- let useone = null
1241
- if (block.usegm) {
1242
- for (const q of m.ann) {
1243
- if (q._isoform != block.usegm.isoform) continue
1244
- if (useone) {
1245
- if (q._csqrank < useone._csqrank) {
1246
- useone = q
1247
- }
1248
- } else {
1249
- useone = q
1250
- }
1251
- }
1252
- if (!useone && block.gmmode == gmmode.genomic) {
1253
- // no match to this gene, but in genomic mode, maybe from other genes?
1254
- useone = m.ann[0]
1255
- }
1256
- } else {
1257
- useone = m.ann[0]
1258
- for (const q of m.ann) {
1259
- if (q._csqrank < useone._csqrank) {
1260
- useone = q
1261
- }
1262
- }
1263
- }
1264
- if (useone) {
1265
- m.gene = useone._gene
1266
- m.isoform = useone._isoform
1267
- m.class = useone._class
1268
- m.dt = useone._dt
1269
- m.mname = useone._mname
1270
-
1271
- if (m.class == mclassnoncoding) {
1272
- delete m.class
1273
- }
1274
- }
1275
- }
1276
-
1277
- if (m.class == undefined) {
1278
- // infer class from m.type, which was assigned by vcf.js
1279
- if (mclass[m.type]) {
1280
- m.class = m.type
1281
- m.dt = mclass[m.type].dt
1282
- m.mname = m.id && m.id != "." ? m.id : m.ref + ">" + m.alt
1283
- if (m.mname.length > 15) {
1284
- // avoid long indel
1285
- m.mname = m.type
1286
- }
1287
- } else {
1288
- m.class = mclassnonstandard
1289
- m.dt = dtsnvindel
1290
- m.mname = m.type
1291
- }
1292
- }
1293
-
1294
- delete m.type
1295
- }
1296
-
1297
- /*
1298
- used in:
1299
- mdssvcnv track, mutation attributes, items that are not annotated by an attribute for showing in legend, and server-side filtering
1300
- */
1301
- export const not_annotated = "Unannotated"
1302
-
1303
- // kernal density estimator as from https://www.d3-graph-gallery.com/graph/density_basic.html
1304
-
1305
- export function kernelDensityEstimator(kernel, X) {
1306
- return function (V) {
1307
- return X.map((x) => {
1308
- return [
1309
- x,
1310
- V.map((v) => kernel(x - v)).reduce((i, j) => i + j, 0) / V.length,
1311
- ]
1312
- })
1313
- }
1314
- }
1315
-
1316
- export function kernelEpanechnikov(k) {
1317
- return function (v) {
1318
- return Math.abs((v /= k)) <= 1 ? (0.75 * (1 - v * v)) / k : 0
1319
- }
1320
- }
1321
-
1322
- /////////////////////// color sets /////////////////////////
1323
-
1324
- export const schemeCategory20 = [
1325
- "#1f77b4",
1326
- "#aec7e8",
1327
- "#ff7f0e",
1328
- "#ffbb78",
1329
- "#2ca02c",
1330
- "#98df8a",
1331
- "#d62728",
1332
- "#ff9896",
1333
- "#9467bd",
1334
- "#c5b0d5",
1335
- "#8c564b",
1336
- "#c49c94",
1337
- "#e377c2",
1338
- "#f7b6d2",
1339
- "#7f7f7f",
1340
- "#c7c7c7",
1341
- "#bcbd22",
1342
- "#dbdb8d",
1343
- "#17becf",
1344
- "#9edae5",
1345
- ]
1346
- export const schemeCategory2 = ["#e75480", "blue"]
1347
-
1348
- export function getColorScheme(number) {
1349
- if (number > 20) {
1350
- const scheme = []
1351
- for (let i = 0; i < number; i++)
1352
- scheme.push(d3.interpolateRainbow(i / number))
1353
- return scheme
1354
- }
1355
- if (number > 12) return schemeCategory20
1356
- else if (number > 8) return d3.schemePaired
1357
- else if (number > 2) return d3.schemeDark2
1358
- else return schemeCategory2
1359
- }
1360
- export function getColors(number) {
1361
- const scheme = getColorScheme(number)
1362
- return d3scale.scaleOrdinal(scheme)
1363
- }
1364
-
1365
- // for now not using getColorScheme() for protein domains, because this color list have been in use since 2015...
1366
- const proteinDomainColors = [
1367
- "#8dd3c7",
1368
- "#bebada",
1369
- "#fb8072",
1370
- "#80b1d3",
1371
- "#E8E89E",
1372
- "#a6d854",
1373
- "#fdb462",
1374
- "#ffd92f",
1375
- "#e5c494",
1376
- "#b3b3b3",
1377
- ]
1378
- export function proteinDomainColorScale() {
1379
- return d3scale.scaleOrdinal().range(proteinDomainColors)
1380
- }
1381
-
1382
- /////////////////////// end of color sets /////////////////////////
1383
-
1384
- export const truncatingMutations = ["F", "N", "L", "P"]
1385
- export const proteinChangingMutations = [
1386
- "F",
1387
- "N",
1388
- "L",
1389
- "P",
1390
- "D",
1391
- "I",
1392
- "ProteinAltering",
1393
- "M",
1394
- ]
1395
- export const synonymousMutations = [
1396
- "S",
1397
- "Intron",
1398
- "Utr3",
1399
- "Utr5",
1400
- "noncoding",
1401
- "E",
1402
- ]
1403
- export const mutationClasses = Object.values(mclass)
1404
- .filter((m) => m.dt == dtsnvindel)
1405
- .map((m) => m.key)
1406
- export const CNVClasses = Object.values(mclass)
1407
- .filter((m) => m.dt == dtcnv)
1408
- .map((m) => m.key)
1409
-
1410
- // dt terms used for filtering variants for geneVariant term
1411
- const dtTerms_temp = [
1412
- {
1413
- id: "snvindel",
1414
- query: "snvindel",
1415
- name: dt2label[dtsnvindel],
1416
- parent_id: null,
1417
- isleaf: true,
1418
- type: "dtsnvindel",
1419
- dt: dtsnvindel,
1420
- values: {},
1421
- },
1422
- {
1423
- id: "cnv",
1424
- query: "cnv",
1425
- name: dt2label[dtcnv],
1426
- parent_id: null,
1427
- isleaf: true,
1428
- type: "dtcnv",
1429
- dt: dtcnv,
1430
- values: {},
1431
- },
1432
- {
1433
- id: "fusion",
1434
- query: "svfusion",
1435
- name: dt2label[dtfusionrna],
1436
- parent_id: null,
1437
- isleaf: true,
1438
- type: "dtfusion",
1439
- dt: dtfusionrna,
1440
- values: {},
1441
- },
1442
- {
1443
- id: "sv",
1444
- query: "svfusion",
1445
- name: dt2label[dtsv],
1446
- parent_id: null,
1447
- isleaf: true,
1448
- type: "dtsv",
1449
- dt: dtsv,
1450
- values: {},
1451
- },
1452
- ]
1453
- // add origin annotations to dt terms
1454
- const dtTerms_temp2 = []
1455
- for (const dtTerm of dtTerms_temp) {
1456
- dtTerm.name_noOrigin = dtTerm.name // for labeling groups in groupsetting
1457
- dtTerms_temp2.push(dtTerm) // no origin
1458
- for (const origin of ["somatic", "germline"]) {
1459
- // add origins
1460
- const addOrigin = {
1461
- id: `${dtTerm.id}_${origin}`,
1462
- name: `${dtTerm.name} (${origin})`,
1463
- origin,
1464
- }
1465
- dtTerms_temp2.push(Object.assign({}, dtTerm, addOrigin))
1466
- }
1467
- }
1468
- export const dtTerms = dtTerms_temp2
1469
-
1470
- export const colorScaleMap = {
1471
- blueWhiteRed: { domain: [0, 0.5, 1], range: ["blue", "white", "red"] },
1472
- greenWhiteRed: { domain: [0, 0.5, 1], range: ["green", "white", "red"] },
1473
- blueYellowRed: {
1474
- domain: [0, 0.17, 0.33, 0.5, 0.67, 0.83, 1],
1475
- range: [
1476
- "#313695",
1477
- "#649AC7",
1478
- "#BCE1ED",
1479
- "#FFFFBF",
1480
- "#FDBE70",
1481
- "#EA5839",
1482
- "#A50026",
1483
- ],
1484
- },
1485
- greenBlackRed: {
1486
- domain: [0, 0.17, 0.33, 0.5, 0.67, 0.83, 1],
1487
- range: [
1488
- "#00FF00",
1489
- "#14E10C",
1490
- "#1AAF10",
1491
- "#000000",
1492
- "#B01205",
1493
- "#E20E03",
1494
- "#FF0000",
1495
- ],
1496
- },
1497
- blueBlackYellow: {
1498
- domain: [0, 0.17, 0.33, 0.5, 0.67, 0.83, 1],
1499
- range: [
1500
- "#0000FF",
1501
- "#0000CC",
1502
- "#000099",
1503
- "#202020",
1504
- "#999900",
1505
- "#CCCC00",
1506
- "#FFFF00",
1507
- ],
1508
- },
1509
- // when hierCluster z-score transformation is not performed, should use two-color scale
1510
- whiteRed: { domain: [0, 1], range: ["white", "red"] },
1511
- }
1512
-
1513
- export function invalidcoord(thisgenome, chrom, start, stop) {
1514
- if (!thisgenome) return "no genome"
1515
- if (!chrom) return "no chr name"
1516
- const chr = thisgenome.chrlookup[chrom.toUpperCase()]
1517
- if (!chr) return "Invalid chromosome name: " + chr
1518
- if (!Number.isInteger(start)) return "Non-numerical position: " + start
1519
- if (start < 0 || start >= chr.len) return "Position out of range: " + start
1520
- if (!Number.isInteger(stop)) return "Non-numerical position: " + stop
1521
- if (stop < 0 || stop > chr.len) return "Position out of range: " + stop
1522
- if (start > stop) return "Start position is greater than stop"
1523
- return false
1524
- }
1525
-
1526
- export function string2pos(s, genome, donotextend) {
1527
- s = s.replace(/,/g, "")
1528
- const chr = genome.chrlookup[s.toUpperCase()]
1529
- if (chr) {
1530
- // chr name only, to middle
1531
- return {
1532
- chr: chr.name,
1533
- chrlen: chr.len,
1534
- start: Math.max(0, Math.ceil(chr.len / 2) - 10000),
1535
- stop: Math.min(chr.len, Math.ceil(chr.len / 2) + 10000),
1536
- }
1537
- }
1538
- {
1539
- // special handling for snv4
1540
- const tmp = s.split(".")
1541
- if (tmp.length >= 2) {
1542
- const chr = genome.chrlookup[tmp[0].toUpperCase()]
1543
- const pos = Number.parseInt(tmp[1])
1544
- const e = invalidcoord(genome, tmp[0], pos, pos + 1)
1545
- if (!e) {
1546
- // valid snv4
1547
- const bpspan = 400
1548
- return {
1549
- chr: chr.name,
1550
- chrlen: chr.len,
1551
- start: Math.max(0, pos - Math.ceil(bpspan / 2)),
1552
- stop: Math.min(chr.len, pos + Math.ceil(bpspan / 2)),
1553
- actualposition: { position: pos, len: 1 },
1554
- }
1555
- }
1556
- }
1557
- }
1558
- const tmp = s.split(/[-:\s]+/)
1559
- if (tmp.length == 2) {
1560
- // must be chr - pos
1561
- const pos = Number.parseInt(tmp[1])
1562
- const e = invalidcoord(genome, tmp[0], pos, pos + 1)
1563
- if (e) {
1564
- return null
1565
- }
1566
- const chr = genome.chrlookup[tmp[0].toUpperCase()]
1567
- const bpspan = 400
1568
- return {
1569
- chr: chr.name,
1570
- chrlen: chr.len,
1571
- start: Math.max(0, pos - Math.ceil(bpspan / 2)),
1572
- stop: Math.min(chr.len, pos + Math.ceil(bpspan / 2)),
1573
- actualposition: { position: pos, len: 1 },
1574
- }
1575
- }
1576
- if (tmp.length == 3) {
1577
- // must be chr - start - stop
1578
- let start = Number.parseInt(tmp[1]),
1579
- stop = Number.parseInt(tmp[2])
1580
- const e = invalidcoord(genome, tmp[0], start, stop)
1581
- if (e) {
1582
- return null
1583
- }
1584
- const actualposition = { position: start, len: stop - start }
1585
- const chr = genome.chrlookup[tmp[0].toUpperCase()]
1586
-
1587
- if (!donotextend) {
1588
- const minspan = 400
1589
- if (stop - start < minspan) {
1590
- let center = Math.ceil((start + stop) / 2)
1591
- if (center + minspan / 2 >= chr.len) {
1592
- center = chr.len - Math.ceil(minspan / 2)
1593
- }
1594
- start = Math.max(0, center - Math.ceil(minspan / 2))
1595
- stop = start + minspan
1596
- }
1597
- }
1598
-
1599
- return {
1600
- chr: chr.name,
1601
- chrlen: chr.len,
1602
- start,
1603
- stop,
1604
- actualposition,
1605
- }
1606
- }
1607
- return null
1608
- }