@datagrok/bio 1.5.7 → 1.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/css/helm.css +3 -0
- package/detectors.js +9 -10
- package/dist/package-test.js +1095 -416
- package/dist/package.js +898 -250
- package/files/samples/sample_FASTA.csv +66 -66
- package/helm/JSDraw/Pistoia.HELM-uncompressed.js +9694 -0
- package/helm/JSDraw/Pistoia.HELM.js +27 -0
- package/helm/JSDraw/ReadMe.txt +8 -0
- package/helm/JSDraw/Scilligence.JSDraw2.Lite-uncompressed.js +31126 -0
- package/helm/JSDraw/Scilligence.JSDraw2.Lite.js +12 -0
- package/helm/JSDraw/Scilligence.JSDraw2.Resources.js +762 -0
- package/helm/JSDraw/dojo.js +250 -0
- package/helm/JSDraw/test.html +21 -0
- package/package.json +8 -1
- package/src/monomer-library.ts +199 -0
- package/src/package-test.ts +2 -0
- package/src/package.ts +41 -13
- package/src/tests/convert-test.ts +143 -22
- package/src/tests/detectors-test.ts +97 -156
- package/src/tests/renderer-test.ts +36 -0
- package/src/tests/splitter-test.ts +22 -0
- package/src/tests/types.ts +7 -0
- package/src/utils/atomic-works.ts +218 -97
- package/src/utils/cell-renderer.ts +214 -0
- package/src/utils/chem-palette.ts +280 -0
- package/src/utils/convert.ts +25 -16
- package/src/utils/misc.ts +29 -0
- package/src/utils/multiple-sequence-alignment.ts +1 -1
- package/src/utils/notation-converter.ts +120 -84
- package/src/utils/sequence-activity-cliffs.ts +2 -2
- package/src/utils/types.ts +13 -0
- package/src/utils/utils.ts +35 -30
- package/test-Bio-34f75e5127b8-c4c5a3dc.html +259 -0
- package/files/sample_FASTA.csv +0 -66
- package/files/sample_FASTA_with_activities.csv +0 -66
- package/files/sample_MSA.csv +0 -541
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
|
+
import {MonomerLibrary} from '../monomer-library';
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
export class ChemPalette {
|
|
10
|
+
cp: StringDictionary = {};
|
|
11
|
+
isInit: boolean = false;
|
|
12
|
+
monomerLib: MonomerLibrary | null = null;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Creates an instance of ChemPalette.
|
|
16
|
+
*
|
|
17
|
+
* @param {string} scheme Color scheme to use.
|
|
18
|
+
* @param {boolean} [grouping=false] Is grouping enabled.
|
|
19
|
+
* @memberof ChemPalette
|
|
20
|
+
*/
|
|
21
|
+
private constructor(scheme: string, grouping = false) {
|
|
22
|
+
if (scheme == 'grok')
|
|
23
|
+
this.cp = ChemPalette.getDatagrok(grouping);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Renders 2D representation of a amino acid residue in a tooltip.
|
|
28
|
+
*
|
|
29
|
+
* @param {DG.GridCell} cell Grid cell to show tooltip over.
|
|
30
|
+
* @param {number} x x coordinate of the mouse pointer.
|
|
31
|
+
* @param {number} y y coordinate of the mouse pointer.
|
|
32
|
+
* @param {MonomerLibrary} monomerLib Monomer Library instance
|
|
33
|
+
*/
|
|
34
|
+
static showTooltip(cell: DG.GridCell, x: number, y: number, monomerLib: MonomerLibrary): void {
|
|
35
|
+
const s = cell.cell.value as string;
|
|
36
|
+
let toDisplay = [ui.divText(s)];
|
|
37
|
+
const [, aarOuter, aarInner] = ChemPalette.getColorAAPivot(s);
|
|
38
|
+
for (const aar of [aarOuter, aarInner]) {
|
|
39
|
+
if (monomerLib.monomerNames.includes(aar)) {
|
|
40
|
+
if (aar in ChemPalette.AANames)
|
|
41
|
+
toDisplay = [ui.divText(ChemPalette.AANames[aar])];
|
|
42
|
+
|
|
43
|
+
if (aar in ChemPalette.AAFullNames)
|
|
44
|
+
toDisplay = [ui.divText(ChemPalette.AANames[ChemPalette.AAFullNames[aar]])];
|
|
45
|
+
|
|
46
|
+
const options = {
|
|
47
|
+
autoCrop: true,
|
|
48
|
+
autoCropMargin: 0,
|
|
49
|
+
suppressChiralText: true,
|
|
50
|
+
};
|
|
51
|
+
const sketch = grok.chem.svgMol(monomerLib.getMonomerMol(aar), undefined, undefined, options);
|
|
52
|
+
if (toDisplay.length == 2)
|
|
53
|
+
toDisplay.push(ui.divText('Modified'));
|
|
54
|
+
|
|
55
|
+
toDisplay.push(sketch);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
ui.tooltip.show(ui.divV(toDisplay), x, y);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Retursn divided amino with its content in the bracket, if the conetent is number, then its omitted
|
|
63
|
+
*
|
|
64
|
+
* @param {string} c raw amino
|
|
65
|
+
* @return {[string, string]} outer and inner content
|
|
66
|
+
*/
|
|
67
|
+
static getInnerOuter(c: string): [string, string] {
|
|
68
|
+
let isInner = 0;
|
|
69
|
+
let inner = '';
|
|
70
|
+
let outer = '';
|
|
71
|
+
|
|
72
|
+
for (const char of c) {
|
|
73
|
+
if (char == '(')
|
|
74
|
+
isInner++;
|
|
75
|
+
else if (char == ')')
|
|
76
|
+
isInner--;
|
|
77
|
+
else if (isInner)
|
|
78
|
+
inner += char;
|
|
79
|
+
else
|
|
80
|
+
outer += char;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
return !isNaN(parseInt(inner)) ? [outer, ''] : [outer, inner];
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
static getColorAAPivot(monomer: string = '', scheme: 'grok' = 'grok'): [string, string, string, number] {
|
|
87
|
+
const chemPaletteInstance = ChemPalette.getPalette(scheme);
|
|
88
|
+
let [outerMonomer, innerMonomer] = ChemPalette.getInnerOuter(monomer);
|
|
89
|
+
outerMonomer = (outerMonomer.length > 6 ? `${outerMonomer.slice(0, 3)}...` : outerMonomer);
|
|
90
|
+
innerMonomer = (innerMonomer.length > 6 ? `${innerMonomer.slice(0, 3)}...` : innerMonomer);
|
|
91
|
+
|
|
92
|
+
if (monomer.length == 1 || monomer[1] == '(') {
|
|
93
|
+
const amino = monomer[0]?.toUpperCase()!;
|
|
94
|
+
return amino in chemPaletteInstance ?
|
|
95
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 1]:
|
|
96
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 1];
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (monomer[0] == 'd' && monomer[1]! in chemPaletteInstance) {
|
|
100
|
+
if (monomer.length == 2 || monomer[2] == '(') {
|
|
101
|
+
const amino = monomer[1]?.toUpperCase()!;
|
|
102
|
+
return amino in chemPaletteInstance ?
|
|
103
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 2]:
|
|
104
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 2];
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (monomer.substring(0, 3) in ChemPalette.AAFullNames) {
|
|
109
|
+
if (monomer.length == 3 || monomer[3] == '(') {
|
|
110
|
+
const amino = ChemPalette.AAFullNames[monomer.substring(0, 3)];
|
|
111
|
+
return amino in chemPaletteInstance ?
|
|
112
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 3]:
|
|
113
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 3];
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (monomer[0]?.toLowerCase() == monomer[0]) {
|
|
118
|
+
if (monomer.substring(1, 3) in ChemPalette.AAFullNames) {
|
|
119
|
+
if (monomer.length == 4 || monomer[4] == '(') {
|
|
120
|
+
const amino = ChemPalette.AAFullNames[monomer.substring(1, 3)];
|
|
121
|
+
return amino in chemPaletteInstance ?
|
|
122
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 4]:
|
|
123
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 4];
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 0];
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
static colourPalette: {[key: string]: string[]} = {
|
|
132
|
+
'orange': ['rgb(255,187,120)', 'rgb(245,167,100)', 'rgb(235,137,70)', 'rgb(205, 111, 71)'],
|
|
133
|
+
'all_green': ['rgb(44,160,44)', 'rgb(74,160,74)', 'rgb(23,103,57)', 'rgb(30,110,96)', 'rgb(60,131,95)',
|
|
134
|
+
'rgb(24,110,79)', 'rgb(152,223,138)', 'rgb(182, 223, 138)', 'rgb(152, 193, 138)'],
|
|
135
|
+
'all_blue': ['rgb(31,119,180)', 'rgb(23,190,207)', 'rgb(122, 102, 189)', 'rgb(158,218,229)', 'rgb(141, 124, 217)',
|
|
136
|
+
'rgb(31, 120, 150)'],
|
|
137
|
+
'magenta': ['rgb(162,106,192)', 'rgb(197,165,224)', 'rgb(208,113,218)'],
|
|
138
|
+
'red': ['rgb(214,39,40)', 'rgb(255,152,150)'],
|
|
139
|
+
'st_blue': ['rgb(23,190,207)', 'rgb(158,218,229)', 'rgb(31,119,180)'],
|
|
140
|
+
'dark_blue': ['rgb(31,119,180)', 'rgb(31, 120, 150)'],
|
|
141
|
+
'light_blue': ['rgb(23,190,207)', 'rgb(158,218,229)', 'rgb(108, 218, 229)', 'rgb(23,190,227)'],
|
|
142
|
+
'lilac_blue': ['rgb(124,102,211)', 'rgb(149,134,217)', 'rgb(97, 81, 150)'],
|
|
143
|
+
'dark_green': ['rgb(23,103,57)', 'rgb(30,110,96)', 'rgb(60,131,95)', 'rgb(24,110,79)'],
|
|
144
|
+
'green': ['rgb(44,160,44)', 'rgb(74,160,74)'],
|
|
145
|
+
'light_green': ['rgb(152,223,138)', 'rgb(182, 223, 138)', 'rgb(152, 193, 138)'],
|
|
146
|
+
'st_green': ['rgb(44,160,44)', 'rgb(152,223,138)', 'rgb(39, 174, 96)', 'rgb(74,160,74)'],
|
|
147
|
+
'pink': ['rgb(247,182,210)'],
|
|
148
|
+
'brown': ['rgb(140,86,75)', 'rgb(102, 62, 54)'],
|
|
149
|
+
'gray': ['rgb(127,127,127)', 'rgb(199,199,199)', 'rgb(196,156,148)', 'rgb(222, 222, 180)'],
|
|
150
|
+
'yellow': ['rgb(188,189,34)'],
|
|
151
|
+
'white': ['rgb(230,230,230)'],
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
static grokGroups: {[key: string]: string[]} = {
|
|
155
|
+
'yellow': ['C', 'U'],
|
|
156
|
+
'red': ['G', 'P'],
|
|
157
|
+
'all_green': ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'],
|
|
158
|
+
'light_blue': ['R', 'H', 'K'],
|
|
159
|
+
'dark_blue': ['D', 'E'],
|
|
160
|
+
'orange': ['S', 'T', 'N', 'Q'],
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
static undefinedColor = 'rgb(100,100,100)';
|
|
164
|
+
|
|
165
|
+
static makePalette(dt: {[key: string]: string[]}, simplified = false, grouping = false): StringDictionary {
|
|
166
|
+
const palette: { [key: string]: string } = {};
|
|
167
|
+
const groups = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
|
|
168
|
+
let currentGroup = 0;
|
|
169
|
+
for (const [color, monomers] of Object.entries(dt)) {
|
|
170
|
+
monomers.forEach((monomer, index) => {
|
|
171
|
+
palette[grouping ? groups[currentGroup] : monomer] = ChemPalette.colourPalette[color][simplified ? 0 : index];
|
|
172
|
+
});
|
|
173
|
+
currentGroup++;
|
|
174
|
+
}
|
|
175
|
+
return palette;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
static AANames: StringDictionary = {
|
|
179
|
+
'G': 'Glycine',
|
|
180
|
+
'L': 'Leucine',
|
|
181
|
+
'Y': 'Tyrosine',
|
|
182
|
+
'S': 'Serine',
|
|
183
|
+
'E': 'Glutamic acid',
|
|
184
|
+
'Q': 'Glutamine',
|
|
185
|
+
'D': 'Aspartic acid',
|
|
186
|
+
'N': 'Asparagine',
|
|
187
|
+
'F': 'Phenylalanine',
|
|
188
|
+
'A': 'Alanine',
|
|
189
|
+
'K': 'Lysine',
|
|
190
|
+
'R': 'Arginine',
|
|
191
|
+
'H': 'Histidine',
|
|
192
|
+
'C': 'Cysteine',
|
|
193
|
+
'V': 'Valine',
|
|
194
|
+
'P': 'Proline',
|
|
195
|
+
'W': 'Tryptophan',
|
|
196
|
+
'I': 'Isoleucine',
|
|
197
|
+
'M': 'Methionine',
|
|
198
|
+
'T': 'Threonine',
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
static AASmiles: StringDictionary = {
|
|
202
|
+
'G': 'NCC(=O)O',
|
|
203
|
+
'L': 'N[C@H](CC(C)C)C(=O)O',
|
|
204
|
+
'Y': 'NC(CC1=CC=C(O)C=C1)C(=O)O',
|
|
205
|
+
'S': 'NC(CO)C(=O)O',
|
|
206
|
+
'E': 'N[C@@H](CCC(O)=O)C(=O)O',
|
|
207
|
+
'Q': 'N[C@@H](CCC(N)=O)C(=O)O',
|
|
208
|
+
'D': 'N[C@@H](CC(O)=O)C(=O)O',
|
|
209
|
+
'N': 'N[C@@H](CC(N)=O)C(=O)O',
|
|
210
|
+
'F': 'NC(CC1=CC=CC=C1)C(=O)O',
|
|
211
|
+
'A': 'N[C@H](C)C(=O)O',
|
|
212
|
+
'K': 'NC(CCCCN)C(=O)O',
|
|
213
|
+
'R': 'N[C@H](CCCNC(=N)C)C(=O)O',
|
|
214
|
+
'H': 'NC(CC1=CN=C[N]1)C(=O)O',
|
|
215
|
+
'C': 'N[C@@H](CS)C(=O)O',
|
|
216
|
+
'V': 'NC(C(C)C)C(=O)O',
|
|
217
|
+
'P': 'N(CCC1)C1C(=O)O',
|
|
218
|
+
'W': 'N[C@@H](Cc1c2ccccc2n([H])c1)C(=O)O',
|
|
219
|
+
'I': 'N[C@H]([C@H](C)CC)C(=O)O',
|
|
220
|
+
'M': 'NC(CCSC)C(=O)O',
|
|
221
|
+
'T': 'NC(C(O)C)C(=O)O',
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
static AASmilesTruncated: StringDictionary = {
|
|
225
|
+
'G': '*C*',
|
|
226
|
+
'L': 'CC(C)C[C@H](*)*',
|
|
227
|
+
'Y': 'C1=CC(=CC=C1CC(*)*)O',
|
|
228
|
+
'S': 'OCC(*)C*',
|
|
229
|
+
'E': '*[C@@H](CCC(O)=O)*',
|
|
230
|
+
'Q': '*N[C@@H](CCC(N)=O)*',
|
|
231
|
+
'D': '*[C@@H](CC(O)=O)*',
|
|
232
|
+
'N': '*[C@@H](CC(N)=O)*',
|
|
233
|
+
'F': 'C1=CC=C(C=C1)CC(*)*',
|
|
234
|
+
'A': 'C[C@H](*)*',
|
|
235
|
+
'K': 'C(CCN)CC(*)*',
|
|
236
|
+
'R': '*[C@H](CCCNC(=N)C)*',
|
|
237
|
+
'H': 'C1=C(NC=N1)CC(*)*',
|
|
238
|
+
'C': 'C([C@@H](*)*)S',
|
|
239
|
+
'V': 'CC(C)C(*)*',
|
|
240
|
+
'P': 'C1CCN(*)C1*',
|
|
241
|
+
'W': '*[C@@H](Cc1c2ccccc2n([H])c1)*',
|
|
242
|
+
'I': 'CC[C@H](C)[C@H](*)*',
|
|
243
|
+
'M': 'CSCCC(*)*',
|
|
244
|
+
'T': 'CC(O)C(*)*',
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
static AAFullNames: StringDictionary = {
|
|
248
|
+
'Ala': 'A',
|
|
249
|
+
'Arg': 'R',
|
|
250
|
+
'Asn': 'N',
|
|
251
|
+
'Asp': 'D',
|
|
252
|
+
'Cys': 'C',
|
|
253
|
+
'Gln': 'Q',
|
|
254
|
+
'Glu': 'E',
|
|
255
|
+
'Gly': 'G',
|
|
256
|
+
'His': 'H',
|
|
257
|
+
'Ile': 'I',
|
|
258
|
+
'Leu': 'L',
|
|
259
|
+
'Lys': 'K',
|
|
260
|
+
'Met': 'M',
|
|
261
|
+
'Phe': 'F',
|
|
262
|
+
'Pro': 'P',
|
|
263
|
+
'Ser': 'S',
|
|
264
|
+
'Thr': 'T',
|
|
265
|
+
'Trp': 'W',
|
|
266
|
+
'Tyr': 'Y',
|
|
267
|
+
'Val': 'V',
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
static getDatagrok(grouping = false): StringDictionary {
|
|
271
|
+
return ChemPalette.makePalette(ChemPalette.grokGroups, false, grouping);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
static getPalette(scheme: 'grok'): StringDictionary {
|
|
275
|
+
if (scheme == 'grok')
|
|
276
|
+
return ChemPalette.getDatagrok();
|
|
277
|
+
else
|
|
278
|
+
throw new Error(`ChemPalette: scheme \`${scheme}\` does not exist`);
|
|
279
|
+
}
|
|
280
|
+
}
|
package/src/utils/convert.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
|
-
import {NotationConverter} from './notation-converter';
|
|
3
|
+
import {NotationConverter, NOTATION} from './notation-converter';
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* Converts notations of a Macromolecule column
|
|
@@ -8,27 +8,36 @@ import {NotationConverter} from './notation-converter';
|
|
|
8
8
|
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
9
9
|
*/
|
|
10
10
|
export function convert(col: DG.Column): void {
|
|
11
|
-
const
|
|
11
|
+
const converter = new NotationConverter(col);
|
|
12
|
+
const current: NOTATION = converter.sourceNotation;
|
|
12
13
|
//TODO: read all notations
|
|
13
|
-
const
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
const notations = [
|
|
15
|
+
NOTATION.FASTA,
|
|
16
|
+
NOTATION.SEPARATOR,
|
|
17
|
+
NOTATION.HELM
|
|
17
18
|
];
|
|
18
|
-
const
|
|
19
|
+
const filtered = notations.filter((e) => e !== current);
|
|
20
|
+
const targetNotationInput = ui.choiceInput('Convert to', filtered[0], filtered);
|
|
21
|
+
|
|
22
|
+
const separatorInput = ui.choiceInput('separator', '-', ['-', '.', '/']);
|
|
19
23
|
|
|
20
24
|
ui.dialog('Convert sequence')
|
|
21
|
-
.add(
|
|
22
|
-
ui.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
.add(ui.div([
|
|
26
|
+
ui.h1('current notation'),
|
|
27
|
+
ui.div(current),
|
|
28
|
+
targetNotationInput.root
|
|
29
|
+
]))
|
|
30
|
+
.add(ui.div([
|
|
31
|
+
ui.h1('Separator'),
|
|
32
|
+
separatorInput,
|
|
33
|
+
|
|
34
|
+
]))
|
|
28
35
|
.onOK(() => {
|
|
29
36
|
//TODO: create new converted column
|
|
30
|
-
const
|
|
31
|
-
const
|
|
37
|
+
//const targetNotation: NOTATION = strToEnum<NOTATION>(NOTATION, targetNotationInput.value)!;
|
|
38
|
+
const targetNotation: NOTATION = targetNotationInput.value as NOTATION;
|
|
39
|
+
const separator = separatorInput.value!;
|
|
40
|
+
const newColumn = converter.convert(targetNotation, separator);
|
|
32
41
|
col.dataFrame.columns.add(newColumn);
|
|
33
42
|
})
|
|
34
43
|
.show();
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import * as C from './constants';
|
|
4
|
+
|
|
5
|
+
export function stringToBool(str: string): boolean {
|
|
6
|
+
return str === 'true' ? true : false;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function getSeparator(col: DG.Column<string>): string {
|
|
10
|
+
const separator = col.tags[C.TAGS.SEPARATOR];
|
|
11
|
+
if (separator)
|
|
12
|
+
return separator as string;
|
|
13
|
+
|
|
14
|
+
const defaultSeparators = ['.', '-', ' '];
|
|
15
|
+
const categories = col.categories;
|
|
16
|
+
const catLen = categories.length;
|
|
17
|
+
for (const potentialSeparator of defaultSeparators) {
|
|
18
|
+
if (categories.filter((sequence) => sequence.includes(potentialSeparator)).length == catLen)
|
|
19
|
+
return potentialSeparator;
|
|
20
|
+
}
|
|
21
|
+
return separator as string ?? '';
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function getTypedArrayConstructor(
|
|
25
|
+
maxNum: number): Uint8ArrayConstructor | Uint16ArrayConstructor | Uint32ArrayConstructor {
|
|
26
|
+
return maxNum < 256 ? Uint8Array :
|
|
27
|
+
maxNum < 65536 ? Uint16Array :
|
|
28
|
+
Uint32Array;
|
|
29
|
+
}
|
|
@@ -62,7 +62,7 @@ export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<D
|
|
|
62
62
|
const tgtUnits = srcUnits.split(':').map((p, i) => i == 1 ? p + '.MSA' : p).join(':');
|
|
63
63
|
|
|
64
64
|
tgtCol.setTag(DG.TAGS.UNITS, tgtUnits);
|
|
65
|
-
tgtCol.semType =
|
|
65
|
+
tgtCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
66
66
|
return tgtCol;
|
|
67
67
|
}
|
|
68
68
|
|
|
@@ -1,131 +1,167 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
2
3
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
4
|
+
/** enum type to simplify setting "user-friendly" notation if necessary */
|
|
5
|
+
export const enum NOTATION {
|
|
6
|
+
FASTA = 'fasta',
|
|
7
|
+
SEPARATOR = 'separator',
|
|
8
|
+
HELM = 'helm'
|
|
9
|
+
}
|
|
9
10
|
|
|
11
|
+
/** Class for handling conversion of notation systems in Macromolecule columns */
|
|
10
12
|
export class NotationConverter {
|
|
11
13
|
private _sourceColumn: DG.Column; // the column to be converted
|
|
12
|
-
private
|
|
13
|
-
private _sourceNotation:
|
|
14
|
-
|
|
14
|
+
private _sourceUnits: string; // units, of the form fasta:SEQ:NT, etc.
|
|
15
|
+
private _sourceNotation: NOTATION; // current notation (without :SEQ:NT, etc.)
|
|
16
|
+
|
|
17
|
+
private get sourceUnits(): string { return this._sourceUnits; }
|
|
15
18
|
|
|
16
19
|
private get sourceColumn(): DG.Column { return this._sourceColumn; }
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
public
|
|
27
|
-
|
|
28
|
-
public
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
20
|
+
|
|
21
|
+
public get sourceNotation(): NOTATION { return this._sourceNotation; }
|
|
22
|
+
|
|
23
|
+
public isFasta(): boolean { return this.sourceNotation === NOTATION.FASTA; }
|
|
24
|
+
|
|
25
|
+
public isSeparator(): boolean { return this.sourceNotation === NOTATION.SEPARATOR; }
|
|
26
|
+
|
|
27
|
+
public isHelm(): boolean { return this.sourceNotation === NOTATION.HELM; }
|
|
28
|
+
|
|
29
|
+
public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }
|
|
30
|
+
|
|
31
|
+
public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }
|
|
32
|
+
|
|
33
|
+
public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }
|
|
34
|
+
|
|
35
|
+
// TODO: isRna
|
|
36
|
+
public isRna(): boolean { return this.sourceUnits.toLowerCase().endsWith('nt'); }
|
|
37
|
+
|
|
38
|
+
public isPeptide(): boolean { return this.sourceUnits.toLowerCase().endsWith('pt'); }
|
|
39
|
+
|
|
40
|
+
/** Associate notation types with the corresponding units */
|
|
41
|
+
/**
|
|
42
|
+
* @return {NOTATION} notation associated with the units type
|
|
43
|
+
*/
|
|
44
|
+
private determineSourceNotation(): NOTATION {
|
|
45
|
+
if (this.sourceUnits.toLowerCase().startsWith('fasta'))
|
|
46
|
+
return NOTATION.FASTA;
|
|
47
|
+
else if (this.sourceUnits.toLowerCase().startsWith('separator'))
|
|
48
|
+
return NOTATION.SEPARATOR;
|
|
35
49
|
else
|
|
36
50
|
// TODO: handle possible exceptions
|
|
37
|
-
return
|
|
51
|
+
return NOTATION.HELM;
|
|
38
52
|
}
|
|
39
53
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const
|
|
43
|
-
const
|
|
44
|
-
const
|
|
54
|
+
// TODO: write doc
|
|
55
|
+
private getNewColumn(targetNotation: NOTATION): DG.Column {
|
|
56
|
+
const col = this.sourceColumn;
|
|
57
|
+
const len = col.length;
|
|
58
|
+
const name = targetNotation + '(' + col.name + ')';
|
|
59
|
+
const newColName = col.dataFrame.columns.getUnusedName(name);
|
|
60
|
+
// dummy code
|
|
61
|
+
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));
|
|
45
62
|
newColumn.semType = 'Macromolecule';
|
|
63
|
+
const newUnits = this.sourceUnits.replace(this.sourceNotation.toString(), targetNotation.toString());
|
|
64
|
+
newColumn.setTag(DG.TAGS.UNITS, newUnits);
|
|
65
|
+
// TODO: determine all the qualifiers (units, ...), perhaps, using detectors
|
|
46
66
|
return newColumn;
|
|
47
67
|
}
|
|
48
68
|
|
|
49
|
-
|
|
69
|
+
// TODO: write doc
|
|
70
|
+
private convertFastaToSeparator(separator: string): DG.Column {
|
|
50
71
|
// TODO: implementation
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
72
|
+
// * specify separator
|
|
73
|
+
// * fasta gap symbol should NOT be considered '-' only, but set as a parameter
|
|
74
|
+
// * in fasta every position is a monomer (no multi-char monomers), call splitToMonomers() method
|
|
75
|
+
// * in the resulting jagged array, every gap symbol is to be replaced by
|
|
76
|
+
// the empty string, while the monomers, to be separated by the separator
|
|
77
|
+
// (specified as a parameter)
|
|
78
|
+
// On splitToMonomers(): /libraries/bio/src/viewers/WebLogo --> getSplitter
|
|
79
|
+
|
|
80
|
+
const gapSymbol = '-'; // to be specified as an argument
|
|
81
|
+
const splitterAsFasta = WebLogo.splitterAsFasta;
|
|
82
|
+
const newColumn = this.getNewColumn(NOTATION.SEPARATOR);
|
|
83
|
+
newColumn.init((idx: number) => {
|
|
84
|
+
const sourcePolymer = this.sourceColumn.get(idx);
|
|
85
|
+
const monomersArray = splitterAsFasta(sourcePolymer);
|
|
86
|
+
for (let i = 0; i < monomersArray.length; i++) {
|
|
87
|
+
if (monomersArray[i] === gapSymbol)
|
|
88
|
+
monomersArray[i] = '';
|
|
89
|
+
}
|
|
90
|
+
return monomersArray.join(separator);
|
|
91
|
+
});
|
|
92
|
+
return newColumn;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
private wrapRnaNucleotideToHelm(monomer: string) {
|
|
96
|
+
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
private convertFastaToHelm(): DG.Column {
|
|
100
|
+
const gapSymbol = '-'; // to be specified as an argument
|
|
101
|
+
const splitterAsFasta = WebLogo.splitterAsFasta;
|
|
102
|
+
const newColumn = this.getNewColumn(NOTATION.HELM);
|
|
103
|
+
newColumn.init((idx: number) => {
|
|
104
|
+
const sourcePolymer = this.sourceColumn.get(idx);
|
|
105
|
+
const monomersArray = splitterAsFasta(sourcePolymer);
|
|
106
|
+
for (let i = 0; i < monomersArray.length; i++) {
|
|
107
|
+
// // TODO: handle gap symbols -- replace by asterisk
|
|
108
|
+
// if (monomersArray[i] === gapSymbol)
|
|
109
|
+
// monomersArray[i] = '*';
|
|
110
|
+
// else
|
|
111
|
+
}
|
|
112
|
+
// TODO: determine conditionally (if isDna(), or isRna(), or isPeptide()) the template
|
|
113
|
+
return monomersArray.join('');
|
|
114
|
+
});
|
|
55
115
|
return newColumn;
|
|
56
116
|
}
|
|
57
117
|
|
|
58
118
|
private convertSeparatorToFasta(): DG.Column {
|
|
59
119
|
// TODO: implementation
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
120
|
+
// * similarly to fasta2separator, divide string into monomers
|
|
121
|
+
// * adjacent separators is a gap (symbol to be specified)
|
|
122
|
+
// * the monomers MUST be single-character onles, otherwise forbid
|
|
123
|
+
// conversion
|
|
124
|
+
//getSplitterWithSeparator
|
|
125
|
+
return this.getNewColumn(NOTATION.FASTA);
|
|
65
126
|
}
|
|
66
127
|
|
|
67
128
|
private convertSeparatorToHelm(): DG.Column {
|
|
68
129
|
// TODO: implementation
|
|
69
|
-
|
|
70
|
-
const newColName = 'converted';
|
|
71
|
-
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2helm'));
|
|
72
|
-
newColumn.semType = 'Macromolecule';
|
|
73
|
-
return newColumn;
|
|
130
|
+
return this.getNewColumn(NOTATION.HELM);
|
|
74
131
|
}
|
|
75
132
|
|
|
76
133
|
private convertHelmToFasta(): DG.Column {
|
|
77
134
|
// TODO: implementation
|
|
78
|
-
|
|
79
|
-
const newColName = 'converted';
|
|
80
|
-
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2fasta'));
|
|
81
|
-
newColumn.semType = 'Macromolecule';
|
|
82
|
-
return newColumn;
|
|
135
|
+
return this.getNewColumn(NOTATION.FASTA);
|
|
83
136
|
}
|
|
84
137
|
|
|
85
138
|
private convertHelmToSeparator(): DG.Column {
|
|
86
|
-
// TODO:
|
|
87
|
-
|
|
88
|
-
const newColName = 'converted';
|
|
89
|
-
const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2sep'));
|
|
90
|
-
newColumn.semType = 'Macromolecule';
|
|
91
|
-
return newColumn;
|
|
139
|
+
// TODO: implementatioreturn this.getNewColumn();
|
|
140
|
+
return this.getNewColumn(NOTATION.SEPARATOR);
|
|
92
141
|
}
|
|
93
142
|
|
|
143
|
+
/** Dispatcher method for notation conversion */
|
|
94
144
|
// TODO: write the bodies of converter methods
|
|
95
|
-
public convert(
|
|
96
|
-
if (
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
else if (
|
|
102
|
-
this.sourceNotation == this._fasta &&
|
|
103
|
-
this.targetNotation == this._helm
|
|
104
|
-
)
|
|
145
|
+
public convert(targetNotation: NOTATION, separator: string | null): DG.Column {
|
|
146
|
+
if (this.sourceNotation === targetNotation)
|
|
147
|
+
throw new Error('Target notation is not specified');
|
|
148
|
+
if (this.isFasta() && this.toSeparator(targetNotation))
|
|
149
|
+
return this.convertFastaToSeparator(separator!); // there is the only place where a separator is needed
|
|
150
|
+
else if (this.isFasta() && this.toHelm(targetNotation))
|
|
105
151
|
return this.convertFastaToHelm();
|
|
106
|
-
else if (
|
|
107
|
-
this.sourceNotation == this._separator &&
|
|
108
|
-
this.targetNotation == this._fasta
|
|
109
|
-
)
|
|
152
|
+
else if (this.isSeparator() && this.toFasta(targetNotation))
|
|
110
153
|
return this.convertSeparatorToFasta();
|
|
111
|
-
else if (
|
|
112
|
-
this.sourceNotation == this._separator &&
|
|
113
|
-
this.targetNotation == this._helm
|
|
114
|
-
)
|
|
154
|
+
else if (this.isSeparator() && this.toHelm(targetNotation))
|
|
115
155
|
return this.convertSeparatorToHelm();
|
|
116
|
-
else if (
|
|
117
|
-
this.sourceNotation == this._helm &&
|
|
118
|
-
this.targetNotation == this._fasta
|
|
119
|
-
)
|
|
156
|
+
else if (this.isHelm() && this.toFasta(targetNotation))
|
|
120
157
|
return this.convertHelmToFasta();
|
|
121
158
|
else
|
|
122
159
|
return this.convertHelmToSeparator();
|
|
123
160
|
}
|
|
124
161
|
|
|
125
|
-
public constructor(col: DG.Column
|
|
162
|
+
public constructor(col: DG.Column) {
|
|
126
163
|
this._sourceColumn = col;
|
|
127
|
-
this.
|
|
164
|
+
this._sourceUnits = this._sourceColumn.tags[DG.TAGS.UNITS];
|
|
128
165
|
this._sourceNotation = this.determineSourceNotation();
|
|
129
|
-
this._targetNotation = target;
|
|
130
166
|
}
|
|
131
167
|
}
|
|
@@ -2,9 +2,9 @@ import {IDrawTooltipParams} from '@datagrok-libraries/ml/src/viewers/activity-cl
|
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
|
|
5
|
-
import {
|
|
5
|
+
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
6
6
|
|
|
7
|
-
export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null>{
|
|
7
|
+
export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null> {
|
|
8
8
|
const stringArray = col.toList();
|
|
9
9
|
const distances = new Array(stringArray.length).fill(0.0);
|
|
10
10
|
for (let i = 0; i < stringArray.length; ++i)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
export type DataFrameDict = {[key: string]: DG.DataFrame};
|
|
4
|
+
|
|
5
|
+
export namespace BarChart {
|
|
6
|
+
export type BarPart = {colName : string, aaName : string};
|
|
7
|
+
export type BarStatsObject = {name: string, count: number, selectedCount: number};
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export type UTypedArray = Uint8Array | Uint16Array | Uint32Array;
|
|
11
|
+
//AAR: (Position: (index: indexList))
|
|
12
|
+
export type SubstitutionsInfo = Map<string, Map<string, Map<number, number[] | UTypedArray>>>;
|
|
13
|
+
export type SelectionObject = {[postiton: string]: string[]};
|