@datagrok/peptides 0.8.7 → 0.8.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -12
- package/setup.cmd +18 -0
- package/src/model.ts +568 -90
- package/src/monomer-library.ts +19 -14
- package/src/package-test.ts +4 -4
- package/src/package.ts +58 -35
- package/src/peptides.ts +225 -105
- package/src/tests/peptide-space-test.ts +1 -1
- package/src/tests/peptides-tests.ts +14 -75
- package/src/tests/utils.ts +3 -2
- package/src/utils/cell-renderer.ts +120 -72
- package/src/utils/chem-palette.ts +100 -163
- package/src/utils/multiple-sequence-alignment.ts +33 -2
- package/src/utils/multivariate-analysis.ts +77 -0
- package/src/utils/peptide-similarity-space.ts +18 -28
- package/src/viewers/logo-viewer.ts +5 -4
- package/src/viewers/sar-viewer.ts +89 -129
- package/src/viewers/stacked-barchart-viewer.ts +348 -359
- package/src/viewers/subst-viewer.ts +103 -70
- package/src/widgets/analyze-peptides.ts +43 -25
- package/src/widgets/manual-alignment.ts +6 -4
- package/src/widgets/multiple-sequence-alignment.ts +9 -0
- package/src/widgets/peptide-molecule.ts +8 -6
- package/src/widgets/subst-table.ts +63 -0
- package/src/workers/dimensionality-reducer.ts +1 -1
- package/src/describe.ts +0 -535
- package/src/utils/split-aligned.ts +0 -72
|
@@ -3,15 +3,13 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
|
+
import {MonomerLibrary} from '../monomer-library';
|
|
7
|
+
|
|
6
8
|
|
|
7
|
-
/**
|
|
8
|
-
* Chem palette class.
|
|
9
|
-
*
|
|
10
|
-
* @export
|
|
11
|
-
* @class ChemPalette
|
|
12
|
-
*/
|
|
13
9
|
export class ChemPalette {
|
|
14
10
|
cp: StringDictionary = {};
|
|
11
|
+
isInit: boolean = false;
|
|
12
|
+
monomerLib: MonomerLibrary | null = null;
|
|
15
13
|
|
|
16
14
|
/**
|
|
17
15
|
* Creates an instance of ChemPalette.
|
|
@@ -20,7 +18,7 @@ export class ChemPalette {
|
|
|
20
18
|
* @param {boolean} [grouping=false] Is grouping enabled.
|
|
21
19
|
* @memberof ChemPalette
|
|
22
20
|
*/
|
|
23
|
-
constructor(scheme: string, grouping = false) {
|
|
21
|
+
private constructor(scheme: string, grouping = false) {
|
|
24
22
|
if (scheme == 'grok')
|
|
25
23
|
this.cp = ChemPalette.getDatagrok(grouping);
|
|
26
24
|
}
|
|
@@ -32,102 +30,112 @@ export class ChemPalette {
|
|
|
32
30
|
* @param {number} x x coordinate of the mouse pointer.
|
|
33
31
|
* @param {number} y y coordinate of the mouse pointer.
|
|
34
32
|
*/
|
|
35
|
-
showTooltip(cell: DG.GridCell, x: number, y: number) {
|
|
33
|
+
static showTooltip(cell: DG.GridCell, x: number, y: number, monomerLib: MonomerLibrary) {
|
|
34
|
+
// if (!this.isInit) {
|
|
35
|
+
// const validPackage = _package ?? _packageTest;
|
|
36
|
+
// if (!validPackage)
|
|
37
|
+
// throw new Error('No package instance found');
|
|
38
|
+
// this.monomerLib = new MonomerLibrary(await validPackage.files.readAsText(`HELMMonomers_June10.sdf`));
|
|
39
|
+
// this.isInit = true;
|
|
40
|
+
// }
|
|
41
|
+
|
|
36
42
|
const s = cell.cell.value as string;
|
|
37
43
|
let toDisplay = [ui.divText(s)];
|
|
38
|
-
const [,
|
|
39
|
-
|
|
40
|
-
if (
|
|
41
|
-
|
|
44
|
+
const [, aarOuter, aarInner] = ChemPalette.getColorAAPivot(s);
|
|
45
|
+
for (const aar of [aarOuter, aarInner]) {
|
|
46
|
+
if (monomerLib.monomerNames.includes(aar)) {
|
|
47
|
+
if (aar in ChemPalette.AANames)
|
|
48
|
+
toDisplay = [ui.divText(ChemPalette.AANames[aar])];
|
|
49
|
+
|
|
50
|
+
if (aar in ChemPalette.AAFullNames)
|
|
51
|
+
toDisplay = [ui.divText(ChemPalette.AANames[ChemPalette.AAFullNames[aar]])];
|
|
42
52
|
|
|
43
|
-
|
|
44
|
-
|
|
53
|
+
const options = {
|
|
54
|
+
autoCrop: true,
|
|
55
|
+
autoCropMargin: 0,
|
|
56
|
+
suppressChiralText: true,
|
|
57
|
+
};
|
|
58
|
+
const sketch = grok.chem.svgMol(monomerLib.getMonomerMol(aar), undefined, undefined, options);
|
|
59
|
+
if (toDisplay.length == 2)
|
|
60
|
+
toDisplay.push(ui.divText('Modified'));
|
|
45
61
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
autoCropMargin: 0,
|
|
49
|
-
suppressChiralText: true,
|
|
50
|
-
};
|
|
51
|
-
const sketch = grok.chem.svgMol(ChemPalette.AASmiles[aar], undefined, undefined, options);
|
|
52
|
-
toDisplay.push(sketch);
|
|
62
|
+
toDisplay.push(sketch);
|
|
63
|
+
}
|
|
53
64
|
}
|
|
54
65
|
ui.tooltip.show(ui.divV(toDisplay), x, y);
|
|
55
66
|
}
|
|
56
67
|
|
|
57
68
|
/**
|
|
58
|
-
*
|
|
59
|
-
*
|
|
60
|
-
* @
|
|
69
|
+
* Retursn divided amino with its content in the bracket, if the conetent is number, then its omitted
|
|
70
|
+
*
|
|
71
|
+
* @param {string} c raw amino
|
|
72
|
+
* @return {[string, string]} outer and inner content
|
|
61
73
|
*/
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
74
|
+
static getInnerOuter(c: string): [string, string] {
|
|
75
|
+
let isInner = 0;
|
|
76
|
+
let inner = '';
|
|
77
|
+
let outer = '';
|
|
78
|
+
|
|
79
|
+
for (const char of c) {
|
|
80
|
+
if (char == '(')
|
|
81
|
+
isInner++;
|
|
82
|
+
else if (char == ')')
|
|
83
|
+
isInner--;
|
|
84
|
+
else if (isInner)
|
|
85
|
+
inner += char;
|
|
86
|
+
else
|
|
87
|
+
outer += char;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return !isNaN(parseInt(inner)) ? [outer, ''] : [outer, inner];
|
|
65
91
|
}
|
|
66
92
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
[
|
|
93
|
+
static getColorAAPivot(monomer: string = '', scheme: 'grok' = 'grok'): [string, string, string, number] {
|
|
94
|
+
// const chemPaletteInstance = ChemPalette.getDatagrok();
|
|
95
|
+
const chemPaletteInstance = ChemPalette.getPalette(scheme);
|
|
96
|
+
let [outerMonomer, innerMonomer] = ChemPalette.getInnerOuter(monomer);
|
|
97
|
+
outerMonomer = (outerMonomer.length > 6 ? `${outerMonomer.slice(0, 3)}...` : outerMonomer);
|
|
98
|
+
innerMonomer = (innerMonomer.length > 6 ? `${innerMonomer.slice(0, 3)}...` : innerMonomer);
|
|
99
|
+
|
|
100
|
+
if (monomer.length == 1 || monomer[1] == '(') {
|
|
101
|
+
const amino = monomer[0]?.toUpperCase()!;
|
|
102
|
+
return amino in chemPaletteInstance ?
|
|
103
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 1]:
|
|
104
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 1];
|
|
78
105
|
}
|
|
79
106
|
|
|
80
|
-
if (
|
|
81
|
-
if (
|
|
82
|
-
const amino =
|
|
83
|
-
return amino in
|
|
84
|
-
[
|
|
85
|
-
[ChemPalette.undefinedColor,
|
|
107
|
+
if (monomer[0] == 'd' && monomer[1]! in chemPaletteInstance) {
|
|
108
|
+
if (monomer.length == 2 || monomer[2] == '(') {
|
|
109
|
+
const amino = monomer[1]?.toUpperCase()!;
|
|
110
|
+
return amino in chemPaletteInstance ?
|
|
111
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 2]:
|
|
112
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 2];
|
|
86
113
|
}
|
|
87
114
|
}
|
|
88
115
|
|
|
89
|
-
if (
|
|
90
|
-
if (
|
|
91
|
-
const amino = ChemPalette.AAFullNames[
|
|
92
|
-
return amino in
|
|
93
|
-
[
|
|
94
|
-
[ChemPalette.undefinedColor,
|
|
116
|
+
if (monomer.substring(0, 3) in ChemPalette.AAFullNames) {
|
|
117
|
+
if (monomer.length == 3 || monomer[3] == '(') {
|
|
118
|
+
const amino = ChemPalette.AAFullNames[monomer.substring(0, 3)];
|
|
119
|
+
return amino in chemPaletteInstance ?
|
|
120
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 3]:
|
|
121
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 3];
|
|
95
122
|
}
|
|
96
123
|
}
|
|
97
124
|
|
|
98
|
-
if (
|
|
99
|
-
if (
|
|
100
|
-
if (
|
|
101
|
-
const amino = ChemPalette.AAFullNames[
|
|
102
|
-
return amino in
|
|
103
|
-
[
|
|
104
|
-
[ChemPalette.undefinedColor,
|
|
125
|
+
if (monomer[0]?.toLowerCase() == monomer[0]) {
|
|
126
|
+
if (monomer.substring(1, 3) in ChemPalette.AAFullNames) {
|
|
127
|
+
if (monomer.length == 4 || monomer[4] == '(') {
|
|
128
|
+
const amino = ChemPalette.AAFullNames[monomer.substring(1, 3)];
|
|
129
|
+
return amino in chemPaletteInstance ?
|
|
130
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 4]:
|
|
131
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 4];
|
|
105
132
|
}
|
|
106
133
|
}
|
|
107
134
|
}
|
|
108
135
|
|
|
109
|
-
return [ChemPalette.undefinedColor,
|
|
136
|
+
return [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 0];
|
|
110
137
|
}
|
|
111
138
|
|
|
112
|
-
/**
|
|
113
|
-
* Get color pivot.
|
|
114
|
-
*
|
|
115
|
-
* @param c
|
|
116
|
-
* @returns
|
|
117
|
-
*/
|
|
118
|
-
getColorPivot(c = ''): [string, number] {
|
|
119
|
-
//TODO: merge with getColorAAPivot?
|
|
120
|
-
const [color,, pivot] = this.getColorAAPivot(c);
|
|
121
|
-
return [color, pivot];
|
|
122
|
-
};
|
|
123
|
-
|
|
124
|
-
/**
|
|
125
|
-
* Color palette
|
|
126
|
-
*
|
|
127
|
-
* @static
|
|
128
|
-
* @type {{[key: string]: string[]}}
|
|
129
|
-
* @memberof ChemPalette
|
|
130
|
-
*/
|
|
131
139
|
static colourPalette: {[key: string]: string[]} = {
|
|
132
140
|
'orange': ['rgb(255,187,120)', 'rgb(245,167,100)', 'rgb(235,137,70)', 'rgb(205, 111, 71)'],
|
|
133
141
|
'all_green': ['rgb(44,160,44)', 'rgb(74,160,74)', 'rgb(23,103,57)', 'rgb(30,110,96)', 'rgb(60,131,95)',
|
|
@@ -149,15 +157,8 @@ export class ChemPalette {
|
|
|
149
157
|
'gray': ['rgb(127,127,127)', 'rgb(199,199,199)', 'rgb(196,156,148)', 'rgb(222, 222, 180)'],
|
|
150
158
|
'yellow': ['rgb(188,189,34)'],
|
|
151
159
|
'white': ['rgb(230,230,230)'],
|
|
152
|
-
}
|
|
160
|
+
};
|
|
153
161
|
|
|
154
|
-
/**
|
|
155
|
-
* Grok color scheme groups.
|
|
156
|
-
*
|
|
157
|
-
* @static
|
|
158
|
-
* @type {{[key: string]: string[]}}
|
|
159
|
-
* @memberof ChemPalette
|
|
160
|
-
*/
|
|
161
162
|
static grokGroups: {[key: string]: string[]} = {
|
|
162
163
|
'yellow': ['C', 'U'],
|
|
163
164
|
'red': ['G', 'P'],
|
|
@@ -165,40 +166,11 @@ export class ChemPalette {
|
|
|
165
166
|
'light_blue': ['R', 'H', 'K'],
|
|
166
167
|
'dark_blue': ['D', 'E'],
|
|
167
168
|
'orange': ['S', 'T', 'N', 'Q'],
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
/**
|
|
171
|
-
* Lesk color scheme groups.
|
|
172
|
-
*
|
|
173
|
-
* @static
|
|
174
|
-
* @type {{[key: string]: string[]}}
|
|
175
|
-
* @memberof ChemPalette
|
|
176
|
-
*/
|
|
177
|
-
static leskGroups: {[key: string]: string[]} = {
|
|
178
|
-
'orange': ['G', 'A', 'S', 'T'],
|
|
179
|
-
'all_green': ['C', 'V', 'I', 'L', 'P', 'F', 'Y', 'M', 'W'],
|
|
180
|
-
'magenta': ['N', 'Q', 'H'],
|
|
181
|
-
'red': ['D', 'E'],
|
|
182
|
-
'all_blue': ['K', 'R'],
|
|
183
|
-
}
|
|
169
|
+
};
|
|
184
170
|
|
|
185
|
-
/**
|
|
186
|
-
* Undefined color.
|
|
187
|
-
*
|
|
188
|
-
* @static
|
|
189
|
-
* @memberof ChemPalette
|
|
190
|
-
*/
|
|
191
171
|
static undefinedColor = 'rgb(100,100,100)';
|
|
192
172
|
|
|
193
|
-
|
|
194
|
-
* Create palette.
|
|
195
|
-
*
|
|
196
|
-
* @param dt
|
|
197
|
-
* @param simplified Is simplified.
|
|
198
|
-
* @param grouping Is grouping enabled.
|
|
199
|
-
* @returns
|
|
200
|
-
*/
|
|
201
|
-
static makePalette(dt: {[key: string]: string[]}, simplified = false, grouping = false) {
|
|
173
|
+
static makePalette(dt: {[key: string]: string[]}, simplified = false, grouping = false): StringDictionary {
|
|
202
174
|
const palette: { [key: string]: string } = {};
|
|
203
175
|
const groups = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
|
|
204
176
|
let currentGroup = 0;
|
|
@@ -211,13 +183,6 @@ export class ChemPalette {
|
|
|
211
183
|
return palette;
|
|
212
184
|
}
|
|
213
185
|
|
|
214
|
-
/**
|
|
215
|
-
* Amino acid residue names.
|
|
216
|
-
*
|
|
217
|
-
* @static
|
|
218
|
-
* @type {StringDictionary}
|
|
219
|
-
* @memberof ChemPalette
|
|
220
|
-
*/
|
|
221
186
|
static AANames: StringDictionary = {
|
|
222
187
|
'G': 'Glycine',
|
|
223
188
|
'L': 'Leucine',
|
|
@@ -239,15 +204,8 @@ export class ChemPalette {
|
|
|
239
204
|
'I': 'Isoleucine',
|
|
240
205
|
'M': 'Methionine',
|
|
241
206
|
'T': 'Threonine',
|
|
242
|
-
}
|
|
207
|
+
};
|
|
243
208
|
|
|
244
|
-
/**
|
|
245
|
-
* Amino acid residue SMILES.
|
|
246
|
-
*
|
|
247
|
-
* @static
|
|
248
|
-
* @type {StringDictionary}
|
|
249
|
-
* @memberof ChemPalette
|
|
250
|
-
*/
|
|
251
209
|
static AASmiles: StringDictionary = {
|
|
252
210
|
'G': 'NCC(=O)O',
|
|
253
211
|
'L': 'N[C@H](CC(C)C)C(=O)O',
|
|
@@ -269,15 +227,8 @@ export class ChemPalette {
|
|
|
269
227
|
'I': 'N[C@H]([C@H](C)CC)C(=O)O',
|
|
270
228
|
'M': 'NC(CCSC)C(=O)O',
|
|
271
229
|
'T': 'NC(C(O)C)C(=O)O',
|
|
272
|
-
}
|
|
230
|
+
};
|
|
273
231
|
|
|
274
|
-
/**
|
|
275
|
-
* Amino acid residue truncated SMILES.
|
|
276
|
-
*
|
|
277
|
-
* @static
|
|
278
|
-
* @type {StringDictionary}
|
|
279
|
-
* @memberof ChemPalette
|
|
280
|
-
*/
|
|
281
232
|
static AASmilesTruncated: StringDictionary = {
|
|
282
233
|
'G': '*C*',
|
|
283
234
|
'L': 'CC(C)C[C@H](*)*',
|
|
@@ -299,15 +250,8 @@ export class ChemPalette {
|
|
|
299
250
|
'I': 'CC[C@H](C)[C@H](*)*',
|
|
300
251
|
'M': 'CSCCC(*)*',
|
|
301
252
|
'T': 'CC(O)C(*)*',
|
|
302
|
-
}
|
|
253
|
+
};
|
|
303
254
|
|
|
304
|
-
/**
|
|
305
|
-
* Amino acid residue full names.
|
|
306
|
-
*
|
|
307
|
-
* @static
|
|
308
|
-
* @type {StringDictionary}
|
|
309
|
-
* @memberof ChemPalette
|
|
310
|
-
*/
|
|
311
255
|
static AAFullNames: StringDictionary = {
|
|
312
256
|
'Ala': 'A',
|
|
313
257
|
'Arg': 'R',
|
|
@@ -329,25 +273,18 @@ export class ChemPalette {
|
|
|
329
273
|
'Trp': 'W',
|
|
330
274
|
'Tyr': 'Y',
|
|
331
275
|
'Val': 'V',
|
|
332
|
-
}
|
|
276
|
+
};
|
|
333
277
|
|
|
334
|
-
|
|
335
|
-
* Get Datagrok palette.
|
|
336
|
-
*
|
|
337
|
-
* @param grouping Is grouping enabled?
|
|
338
|
-
* @returns
|
|
339
|
-
*/
|
|
340
|
-
static getDatagrok(grouping = false) {
|
|
278
|
+
static getDatagrok(grouping = false): StringDictionary {
|
|
341
279
|
return ChemPalette.makePalette(ChemPalette.grokGroups, false, grouping);
|
|
342
280
|
}
|
|
343
281
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
return ChemPalette.makePalette(ChemPalette.leskGroups);
|
|
282
|
+
static getPalette(scheme: 'grok'): StringDictionary {
|
|
283
|
+
switch (scheme) {
|
|
284
|
+
case 'grok':
|
|
285
|
+
return ChemPalette.getDatagrok();
|
|
286
|
+
default:
|
|
287
|
+
throw new Error(`ChemPalette: scheme \`${scheme}\` does not exist`);
|
|
288
|
+
}
|
|
352
289
|
}
|
|
353
290
|
}
|
|
@@ -7,6 +7,8 @@ import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encod
|
|
|
7
7
|
//@ts-ignore
|
|
8
8
|
import {SEMTYPE} from '../semantics';
|
|
9
9
|
|
|
10
|
+
// let CLI: any = undefined;
|
|
11
|
+
|
|
10
12
|
/**
|
|
11
13
|
* Converts array of sequences into simple fasta string.
|
|
12
14
|
*
|
|
@@ -73,16 +75,45 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
|
|
|
73
75
|
sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
|
|
74
76
|
|
|
75
77
|
const fasta = _stringsToFasta(sequences);
|
|
78
|
+
const CLI = await new Aioli({
|
|
79
|
+
tool: 'kalign',
|
|
80
|
+
version: '3.3.1',
|
|
81
|
+
reinit: true,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// if (!CLI) {
|
|
85
|
+
// CLI = await new Aioli('kalign/3.3.1');
|
|
86
|
+
// console.info('kalign CLI was first initialized.');
|
|
87
|
+
// } else
|
|
88
|
+
// console.info('Initialized kalign CLI was reused.');
|
|
89
|
+
|
|
90
|
+
console.log(['fasta.length =', fasta.length]);
|
|
76
91
|
|
|
77
|
-
const CLI = await new Aioli('kalign/3.3.1');
|
|
78
92
|
await CLI.fs.writeFile('input.fa', fasta);
|
|
79
|
-
const output = await CLI.exec(
|
|
93
|
+
const output = await CLI.exec('kalign input.fa -f fasta -o result.fasta');
|
|
80
94
|
const buf = await CLI.cat('result.fasta');
|
|
81
95
|
|
|
82
96
|
console.warn(output);
|
|
83
97
|
|
|
98
|
+
// if (!buf)
|
|
99
|
+
// console.warn(buf);
|
|
100
|
+
|
|
84
101
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
85
102
|
const alignedCol = DG.Column.fromStrings(`(${col.name})msa`, _stringsToAligned(aligned));
|
|
86
103
|
alignedCol.semType = SEMTYPE.ALIGNED;
|
|
87
104
|
return alignedCol;
|
|
88
105
|
}
|
|
106
|
+
|
|
107
|
+
export async function testMSAEnoughMemory(col: DG.Column) {
|
|
108
|
+
const sequencesCount = col.length;
|
|
109
|
+
const delta = sequencesCount/100;
|
|
110
|
+
|
|
111
|
+
for (let i = delta; i < sequencesCount; i += delta) {
|
|
112
|
+
try {
|
|
113
|
+
await runKalign(DG.Column.fromStrings(col.name, col.toList().slice(0, Math.round(i))));
|
|
114
|
+
console.log(`runKalign succeeded on ${i}`);
|
|
115
|
+
} catch (error) {
|
|
116
|
+
console.log(`runKalign failed on ${i} with '${error}'`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
|
|
4
|
+
import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
|
|
5
|
+
|
|
6
|
+
export async function callMVA(
|
|
7
|
+
tableGrid: DG.Grid,
|
|
8
|
+
view: DG.View,
|
|
9
|
+
currentDf: DG.DataFrame,
|
|
10
|
+
options: {[name: string]: string},
|
|
11
|
+
sequencesCol: DG.Column,
|
|
12
|
+
) {
|
|
13
|
+
const activityCol = await _scaleColumn(currentDf.getCol(options['activityColumnName']), options['scaling']);
|
|
14
|
+
const encDf = _encodeSequences(sequencesCol);
|
|
15
|
+
const scaledColName = `${options['activityColumnName']}scaled`;
|
|
16
|
+
|
|
17
|
+
_insertColumns(
|
|
18
|
+
currentDf,
|
|
19
|
+
[DG.Column.fromList('double', scaledColName, activityCol.toList())],
|
|
20
|
+
);
|
|
21
|
+
_insertColumns(currentDf, encDf.columns);
|
|
22
|
+
|
|
23
|
+
const res = await grok.functions.call('MultivariateAnalysis', {
|
|
24
|
+
table: currentDf,
|
|
25
|
+
features: encDf.columns.names(),
|
|
26
|
+
prediction: scaledColName,
|
|
27
|
+
components: 10,
|
|
28
|
+
showScores: true,
|
|
29
|
+
showRegresCoefs: true,
|
|
30
|
+
});
|
|
31
|
+
console.log(res);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Encodes a series of sequences into a certain scale.
|
|
36
|
+
*
|
|
37
|
+
* @param {string[]} sequencesCol Column containing the sequences.
|
|
38
|
+
* @return {DG.DataFrame} The data frame with seqences encoded.
|
|
39
|
+
*/
|
|
40
|
+
function _encodeSequences(sequencesCol: DG.Column): DG.DataFrame {
|
|
41
|
+
const nRows = sequencesCol.length;
|
|
42
|
+
const nCols = AlignedSequenceEncoder.clean(sequencesCol.get(0)).length;
|
|
43
|
+
const enc = new AlignedSequenceEncoder('WimleyWhite');
|
|
44
|
+
const positions = new Array(nCols).fill(0).map((_) => new Float32Array(nRows));
|
|
45
|
+
|
|
46
|
+
for (let j = 0; j < nRows; ++j) {
|
|
47
|
+
const s = AlignedSequenceEncoder.clean(sequencesCol.get(j));
|
|
48
|
+
for (let i = 0; i < nCols; ++i)
|
|
49
|
+
positions[i][j] = enc.encodeLettter(s[i]);
|
|
50
|
+
}
|
|
51
|
+
const df = DG.DataFrame.fromColumns(positions.map(
|
|
52
|
+
(v, i) => DG.Column.fromFloat32Array((i+1).toString(), v),
|
|
53
|
+
));
|
|
54
|
+
return df;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async function _scaleColumn(column: DG.Column, method: string): Promise<DG.Column> {
|
|
58
|
+
if (method == 'none')
|
|
59
|
+
return column;
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
const formula = (method.startsWith('-') ? '0-' : '')+'Log10(${'+column.name+'})';
|
|
63
|
+
const newCol = await column.applyFormula(formula);
|
|
64
|
+
|
|
65
|
+
if (newCol == null)
|
|
66
|
+
throw new Error('Column formula returned unexpected null.');
|
|
67
|
+
|
|
68
|
+
return newCol!;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function _insertColumns(targetDf: DG.DataFrame, columns: DG.Column[]): DG.DataFrame {
|
|
72
|
+
for (const col of columns)
|
|
73
|
+
targetDf.columns.add(col);
|
|
74
|
+
|
|
75
|
+
return targetDf;
|
|
76
|
+
}
|
|
77
|
+
|
|
@@ -52,40 +52,30 @@ export function cleanAlignedSequencesColumn(col: DG.Column): Array<string> {
|
|
|
52
52
|
* @return {Promise<DG.ScatterPlotViewer>} A viewer.
|
|
53
53
|
*/
|
|
54
54
|
export async function createPeptideSimilaritySpaceViewer(
|
|
55
|
-
table: DG.DataFrame,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
measure: string,
|
|
59
|
-
cyclesCount: number,
|
|
60
|
-
view: DG.TableView | null,
|
|
61
|
-
activityColumnName?: string | null,
|
|
62
|
-
): Promise<DG.ScatterPlotViewer> {
|
|
63
|
-
const pi = DG.TaskBarProgressIndicator.create('Creating embedding.');
|
|
55
|
+
table: DG.DataFrame, alignedSequencesColumn: DG.Column, method: string, measure: string, cyclesCount: number,
|
|
56
|
+
view: DG.TableView | null, activityColumnName?: string | null): Promise<DG.ScatterPlotViewer> {
|
|
57
|
+
const pi = DG.TaskBarProgressIndicator.create('Creating embedding...');
|
|
64
58
|
|
|
65
59
|
activityColumnName = activityColumnName ?? inferActivityColumnsName(table);
|
|
66
60
|
|
|
67
61
|
const axesNames = ['~X', '~Y', '~MW'];
|
|
68
62
|
const columnData = alignedSequencesColumn.toList().map((v, _) => AlignedSequenceEncoder.clean(v));
|
|
69
63
|
|
|
70
|
-
const embcols = await createDimensinalityReducingWorker(
|
|
64
|
+
const embcols = await createDimensinalityReducingWorker(
|
|
65
|
+
{data: columnData, metric: measure as StringMetrics}, method, cyclesCount);
|
|
71
66
|
|
|
72
67
|
const columns = Array.from(
|
|
73
|
-
embcols as Coordinates,
|
|
74
|
-
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
for (let i = 0; i < sequences.length; ++i) {
|
|
82
|
-
currentSequence = sequences[i];
|
|
83
|
-
mw[i] = currentSequence == null ? 0 : getSequenceMolecularWeight(currentSequence);
|
|
84
|
-
}
|
|
68
|
+
embcols as Coordinates, (v: Float32Array, k) => DG.Column.fromFloat32Array(axesNames[k], v));
|
|
69
|
+
|
|
70
|
+
function _getMW(sequences: string[]) {
|
|
71
|
+
const mw: Float32Array = new Float32Array(sequences.length);
|
|
72
|
+
|
|
73
|
+
mw.map((_, index) => getSequenceMolecularWeight(sequences[index] ?? ''));
|
|
74
|
+
|
|
85
75
|
return mw;
|
|
86
76
|
}
|
|
87
77
|
|
|
88
|
-
columns.push(DG.Column.fromFloat32Array('~MW', _getMW()));
|
|
78
|
+
columns.push(DG.Column.fromFloat32Array('~MW', _getMW(columnData)));
|
|
89
79
|
|
|
90
80
|
const edf = DG.DataFrame.fromColumns(columns);
|
|
91
81
|
|
|
@@ -103,11 +93,11 @@ export async function createPeptideSimilaritySpaceViewer(
|
|
|
103
93
|
table.columns.insert(newCol);
|
|
104
94
|
}
|
|
105
95
|
|
|
106
|
-
const viewerOptions = {
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
96
|
+
const viewerOptions = {
|
|
97
|
+
x: '~X', y: '~Y', color: activityColumnName ?? '~MW', size: '~MW', title: 'Peptide Space', showYSelector: false,
|
|
98
|
+
showXSelector: false, showColorSelector: false, showSizeSelector: false,
|
|
99
|
+
};
|
|
100
|
+
const viewer = table.plot.scatter(viewerOptions);
|
|
111
101
|
|
|
112
102
|
pi.close();
|
|
113
103
|
return viewer;
|
|
@@ -4,8 +4,9 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
import $ from 'cash-dom';
|
|
5
5
|
|
|
6
6
|
import * as logojs from 'logojs-react';
|
|
7
|
-
import {splitAlignedPeptides} from '../utils/split-aligned';
|
|
7
|
+
// import {splitAlignedPeptides} from '../utils/split-aligned';
|
|
8
8
|
import {ChemPalette} from '../utils/chem-palette';
|
|
9
|
+
import {PeptidesController} from '../peptides';
|
|
9
10
|
|
|
10
11
|
/**
|
|
11
12
|
* Logo viewer.
|
|
@@ -80,7 +81,7 @@ export class Logo extends DG.JsViewer {
|
|
|
80
81
|
this.initialized = true;
|
|
81
82
|
console.log('INIT');
|
|
82
83
|
this.target = this.dataFrame;
|
|
83
|
-
[this.splitted] = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
84
|
+
[this.splitted] = PeptidesController.splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
84
85
|
this.root.style.width = 'auto';
|
|
85
86
|
this.root.style.height = 'auto';
|
|
86
87
|
this.root.style.maxHeight = '200px';
|
|
@@ -142,8 +143,8 @@ export class Logo extends DG.JsViewer {
|
|
|
142
143
|
.aggregate();
|
|
143
144
|
}
|
|
144
145
|
if (selected)
|
|
145
|
-
[this.splitted] = splitAlignedPeptides(this.target!.columns.bySemType(this.colSemType));
|
|
146
|
-
else [this.splitted] = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
146
|
+
[this.splitted] = PeptidesController.splitAlignedPeptides(this.target!.columns.bySemType(this.colSemType));
|
|
147
|
+
else [this.splitted] = PeptidesController.splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
147
148
|
$(this.root).empty();
|
|
148
149
|
|
|
149
150
|
if (typeof this.dataFrame !== 'undefined')
|