@datagrok/peptides 0.8.5 → 0.8.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +12 -2
- package/files/aligned.csv +648 -3
- package/files/aligned_2.csv +10275 -3
- package/package.json +12 -14
- package/setup.cmd +18 -0
- package/setup.sh +15 -0
- package/src/describe.ts +131 -136
- package/src/model.ts +98 -76
- package/src/monomer-library.ts +184 -0
- package/src/package-test.ts +4 -3
- package/src/package.ts +76 -23
- package/src/peptides.ts +223 -108
- package/src/tests/msa-tests.ts +27 -0
- package/src/tests/peptide-space-test.ts +46 -9
- package/src/tests/peptides-tests.ts +58 -21
- package/src/tests/test-data.ts +649 -0
- package/src/tests/utils.ts +56 -16
- package/src/utils/cell-renderer.ts +211 -58
- package/src/utils/chem-palette.ts +86 -45
- package/src/utils/molecular-measure.ts +3 -4
- package/src/utils/multiple-sequence-alignment.ts +3 -4
- package/src/utils/peptide-similarity-space.ts +44 -37
- package/src/utils/split-aligned.ts +58 -58
- package/src/viewers/logo-viewer.ts +14 -15
- package/src/viewers/sar-viewer.ts +101 -124
- package/src/viewers/stacked-barchart-viewer.ts +360 -365
- package/src/viewers/subst-viewer.ts +115 -71
- package/src/widgets/analyze-peptides.ts +31 -31
- package/src/widgets/manual-alignment.ts +12 -7
- package/src/widgets/multiple-sequence-alignment.ts +9 -0
- package/src/widgets/peptide-molecule.ts +9 -8
- package/src/widgets/subst-table.ts +65 -0
- package/src/workers/dimensionality-reducer.ts +2 -1
- package/tsconfig.json +1 -1
|
@@ -2,6 +2,10 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
+
import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
|
+
import {_package} from '../package';
|
|
7
|
+
import {MonomerLibrary} from '../monomer-library';
|
|
8
|
+
|
|
5
9
|
/**
|
|
6
10
|
* Chem palette class.
|
|
7
11
|
*
|
|
@@ -9,8 +13,9 @@ import * as DG from 'datagrok-api/dg';
|
|
|
9
13
|
* @class ChemPalette
|
|
10
14
|
*/
|
|
11
15
|
export class ChemPalette {
|
|
12
|
-
cp:
|
|
13
|
-
|
|
16
|
+
cp: StringDictionary = {};
|
|
17
|
+
isInit: boolean = false;
|
|
18
|
+
monomerLib: MonomerLibrary | null = null;
|
|
14
19
|
/**
|
|
15
20
|
* Creates an instance of ChemPalette.
|
|
16
21
|
*
|
|
@@ -19,9 +24,8 @@ export class ChemPalette {
|
|
|
19
24
|
* @memberof ChemPalette
|
|
20
25
|
*/
|
|
21
26
|
constructor(scheme: string, grouping = false) {
|
|
22
|
-
if (scheme == 'grok')
|
|
27
|
+
if (scheme == 'grok')
|
|
23
28
|
this.cp = ChemPalette.getDatagrok(grouping);
|
|
24
|
-
}
|
|
25
29
|
}
|
|
26
30
|
|
|
27
31
|
/**
|
|
@@ -31,24 +35,32 @@ export class ChemPalette {
|
|
|
31
35
|
* @param {number} x x coordinate of the mouse pointer.
|
|
32
36
|
* @param {number} y y coordinate of the mouse pointer.
|
|
33
37
|
*/
|
|
34
|
-
showTooltip(cell: DG.GridCell, x: number, y: number) {
|
|
38
|
+
async showTooltip(cell: DG.GridCell, x: number, y: number) {
|
|
39
|
+
if (!this.isInit)
|
|
40
|
+
this.monomerLib = new MonomerLibrary(await _package.files.readAsText(`HELMMonomers_June10.sdf`));
|
|
41
|
+
|
|
35
42
|
const s = cell.cell.value as string;
|
|
36
43
|
let toDisplay = [ui.divText(s)];
|
|
37
|
-
const [,
|
|
38
|
-
|
|
39
|
-
if (
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
+
const [, aarOuter, aarInner] = this.getColorAAPivot(s);
|
|
45
|
+
for (const aar of [aarOuter, aarInner]) {
|
|
46
|
+
if (this.monomerLib!.monomerNames.includes(aar)) {
|
|
47
|
+
if (aar in ChemPalette.AANames)
|
|
48
|
+
toDisplay = [ui.divText(ChemPalette.AANames[aar])];
|
|
49
|
+
|
|
50
|
+
if (aar in ChemPalette.AAFullNames)
|
|
51
|
+
toDisplay = [ui.divText(ChemPalette.AANames[ChemPalette.AAFullNames[aar]])];
|
|
52
|
+
|
|
53
|
+
const options = {
|
|
54
|
+
autoCrop: true,
|
|
55
|
+
autoCropMargin: 0,
|
|
56
|
+
suppressChiralText: true,
|
|
57
|
+
};
|
|
58
|
+
const sketch = grok.chem.svgMol(this.monomerLib!.getMonomerMol(aar), undefined, undefined, options);
|
|
59
|
+
if (toDisplay.length == 2)
|
|
60
|
+
toDisplay.push(ui.divText('Modified'));
|
|
61
|
+
|
|
62
|
+
toDisplay.push(sketch);
|
|
44
63
|
}
|
|
45
|
-
const options = {
|
|
46
|
-
autoCrop: true,
|
|
47
|
-
autoCropMargin: 0,
|
|
48
|
-
suppressChiralText: true,
|
|
49
|
-
};
|
|
50
|
-
const sketch = grok.chem.svgMol(ChemPalette.AASmiles[aar], undefined, undefined, options);
|
|
51
|
-
toDisplay.push(sketch);
|
|
52
64
|
}
|
|
53
65
|
ui.tooltip.show(ui.divV(toDisplay), x, y);
|
|
54
66
|
}
|
|
@@ -63,25 +75,54 @@ export class ChemPalette {
|
|
|
63
75
|
return color;
|
|
64
76
|
}
|
|
65
77
|
|
|
78
|
+
/**
|
|
79
|
+
* Retursn divided amino with its content in the bracket, if the conetent is number, then its omitted
|
|
80
|
+
*
|
|
81
|
+
* @param {string} c raw amino
|
|
82
|
+
* @return {[string, string]} outer and inner content
|
|
83
|
+
*/
|
|
84
|
+
private getInnerOuter(c: string): [string, string] {
|
|
85
|
+
let isInner = 0;
|
|
86
|
+
let inner = '';
|
|
87
|
+
let outer = '';
|
|
88
|
+
|
|
89
|
+
for (let i = 0; i < c.length; ++i) {
|
|
90
|
+
if (c[i] == '(')
|
|
91
|
+
isInner++;
|
|
92
|
+
else if (c[i] == ')')
|
|
93
|
+
isInner--;
|
|
94
|
+
else if (isInner)
|
|
95
|
+
inner += c[i];
|
|
96
|
+
else
|
|
97
|
+
outer += c[i];
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return !isNaN(parseInt(inner)) ? [outer, ''] : [outer, inner];
|
|
101
|
+
}
|
|
102
|
+
|
|
66
103
|
/**
|
|
67
104
|
* Get color for the provided amino acid residue pivot
|
|
68
105
|
* @param {string} [c=''] Amino acid residue string.
|
|
69
106
|
* @return {[string, string, number]}
|
|
70
107
|
*/
|
|
71
|
-
getColorAAPivot(c: string = ''): [string, string, number] {
|
|
108
|
+
getColorAAPivot(c: string = ''): [string, string, string, number] {
|
|
109
|
+
let [outerC, innerC] = this.getInnerOuter(c);
|
|
110
|
+
outerC = (outerC.length > 6 ? outerC.slice(0, 3) + '...' : outerC);
|
|
111
|
+
innerC = (innerC.length > 6 ? innerC.slice(0, 3) + '...' : innerC);
|
|
112
|
+
|
|
72
113
|
if (c.length == 1 || c[1] == '(') {
|
|
73
114
|
const amino = c[0]?.toUpperCase()!;
|
|
74
115
|
return amino in this.cp?
|
|
75
|
-
[this.cp[amino], amino, 1]:
|
|
76
|
-
[ChemPalette.undefinedColor,
|
|
116
|
+
[this.cp[amino], amino, innerC, 1]:
|
|
117
|
+
[ChemPalette.undefinedColor, outerC, innerC, 1];
|
|
77
118
|
}
|
|
78
119
|
|
|
79
120
|
if (c[0] == 'd' && c[1]! in this.cp) {
|
|
80
121
|
if (c.length == 2 || c[2] == '(') {
|
|
81
122
|
const amino = c[1]?.toUpperCase()!;
|
|
82
123
|
return amino in this.cp?
|
|
83
|
-
[this.cp[amino], amino, 2]:
|
|
84
|
-
[ChemPalette.undefinedColor,
|
|
124
|
+
[this.cp[amino], amino, innerC, 2]:
|
|
125
|
+
[ChemPalette.undefinedColor, outerC, innerC, 2];
|
|
85
126
|
}
|
|
86
127
|
}
|
|
87
128
|
|
|
@@ -89,8 +130,8 @@ export class ChemPalette {
|
|
|
89
130
|
if (c.length == 3 || c[3] == '(') {
|
|
90
131
|
const amino = ChemPalette.AAFullNames[c.substr(0, 3)];
|
|
91
132
|
return amino in this.cp?
|
|
92
|
-
[this.cp[amino], amino, 3]:
|
|
93
|
-
[ChemPalette.undefinedColor,
|
|
133
|
+
[this.cp[amino], amino, innerC, 3]:
|
|
134
|
+
[ChemPalette.undefinedColor, outerC, innerC, 3];
|
|
94
135
|
}
|
|
95
136
|
}
|
|
96
137
|
|
|
@@ -99,13 +140,13 @@ export class ChemPalette {
|
|
|
99
140
|
if (c.length == 4 || c[4] == '(') {
|
|
100
141
|
const amino = ChemPalette.AAFullNames[c.substr(1, 3)];
|
|
101
142
|
return amino in this.cp?
|
|
102
|
-
[this.cp[amino], amino, 4]:
|
|
103
|
-
[ChemPalette.undefinedColor,
|
|
143
|
+
[this.cp[amino], amino, innerC, 4]:
|
|
144
|
+
[ChemPalette.undefinedColor, outerC, innerC, 4];
|
|
104
145
|
}
|
|
105
146
|
}
|
|
106
147
|
}
|
|
107
148
|
|
|
108
|
-
return [ChemPalette.undefinedColor,
|
|
149
|
+
return [ChemPalette.undefinedColor, outerC, innerC, 0];
|
|
109
150
|
}
|
|
110
151
|
|
|
111
152
|
/**
|
|
@@ -116,7 +157,7 @@ export class ChemPalette {
|
|
|
116
157
|
*/
|
|
117
158
|
getColorPivot(c = ''): [string, number] {
|
|
118
159
|
//TODO: merge with getColorAAPivot?
|
|
119
|
-
const [color
|
|
160
|
+
const [color,,, pivot] = this.getColorAAPivot(c);
|
|
120
161
|
return [color, pivot];
|
|
121
162
|
};
|
|
122
163
|
|
|
@@ -148,7 +189,7 @@ export class ChemPalette {
|
|
|
148
189
|
'gray': ['rgb(127,127,127)', 'rgb(199,199,199)', 'rgb(196,156,148)', 'rgb(222, 222, 180)'],
|
|
149
190
|
'yellow': ['rgb(188,189,34)'],
|
|
150
191
|
'white': ['rgb(230,230,230)'],
|
|
151
|
-
}
|
|
192
|
+
};
|
|
152
193
|
|
|
153
194
|
/**
|
|
154
195
|
* Grok color scheme groups.
|
|
@@ -164,7 +205,7 @@ export class ChemPalette {
|
|
|
164
205
|
'light_blue': ['R', 'H', 'K'],
|
|
165
206
|
'dark_blue': ['D', 'E'],
|
|
166
207
|
'orange': ['S', 'T', 'N', 'Q'],
|
|
167
|
-
}
|
|
208
|
+
};
|
|
168
209
|
|
|
169
210
|
/**
|
|
170
211
|
* Lesk color scheme groups.
|
|
@@ -179,7 +220,7 @@ export class ChemPalette {
|
|
|
179
220
|
'magenta': ['N', 'Q', 'H'],
|
|
180
221
|
'red': ['D', 'E'],
|
|
181
222
|
'all_blue': ['K', 'R'],
|
|
182
|
-
}
|
|
223
|
+
};
|
|
183
224
|
|
|
184
225
|
/**
|
|
185
226
|
* Undefined color.
|
|
@@ -214,10 +255,10 @@ export class ChemPalette {
|
|
|
214
255
|
* Amino acid residue names.
|
|
215
256
|
*
|
|
216
257
|
* @static
|
|
217
|
-
* @type {
|
|
258
|
+
* @type {StringDictionary}
|
|
218
259
|
* @memberof ChemPalette
|
|
219
260
|
*/
|
|
220
|
-
static AANames:
|
|
261
|
+
static AANames: StringDictionary = {
|
|
221
262
|
'G': 'Glycine',
|
|
222
263
|
'L': 'Leucine',
|
|
223
264
|
'Y': 'Tyrosine',
|
|
@@ -238,16 +279,16 @@ export class ChemPalette {
|
|
|
238
279
|
'I': 'Isoleucine',
|
|
239
280
|
'M': 'Methionine',
|
|
240
281
|
'T': 'Threonine',
|
|
241
|
-
}
|
|
282
|
+
};
|
|
242
283
|
|
|
243
284
|
/**
|
|
244
285
|
* Amino acid residue SMILES.
|
|
245
286
|
*
|
|
246
287
|
* @static
|
|
247
|
-
* @type {
|
|
288
|
+
* @type {StringDictionary}
|
|
248
289
|
* @memberof ChemPalette
|
|
249
290
|
*/
|
|
250
|
-
static AASmiles:
|
|
291
|
+
static AASmiles: StringDictionary = {
|
|
251
292
|
'G': 'NCC(=O)O',
|
|
252
293
|
'L': 'N[C@H](CC(C)C)C(=O)O',
|
|
253
294
|
'Y': 'NC(CC1=CC=C(O)C=C1)C(=O)O',
|
|
@@ -268,16 +309,16 @@ export class ChemPalette {
|
|
|
268
309
|
'I': 'N[C@H]([C@H](C)CC)C(=O)O',
|
|
269
310
|
'M': 'NC(CCSC)C(=O)O',
|
|
270
311
|
'T': 'NC(C(O)C)C(=O)O',
|
|
271
|
-
}
|
|
312
|
+
};
|
|
272
313
|
|
|
273
314
|
/**
|
|
274
315
|
* Amino acid residue truncated SMILES.
|
|
275
316
|
*
|
|
276
317
|
* @static
|
|
277
|
-
* @type {
|
|
318
|
+
* @type {StringDictionary}
|
|
278
319
|
* @memberof ChemPalette
|
|
279
320
|
*/
|
|
280
|
-
static AASmilesTruncated:
|
|
321
|
+
static AASmilesTruncated: StringDictionary = {
|
|
281
322
|
'G': '*C*',
|
|
282
323
|
'L': 'CC(C)C[C@H](*)*',
|
|
283
324
|
'Y': 'C1=CC(=CC=C1CC(*)*)O',
|
|
@@ -298,16 +339,16 @@ export class ChemPalette {
|
|
|
298
339
|
'I': 'CC[C@H](C)[C@H](*)*',
|
|
299
340
|
'M': 'CSCCC(*)*',
|
|
300
341
|
'T': 'CC(O)C(*)*',
|
|
301
|
-
}
|
|
342
|
+
};
|
|
302
343
|
|
|
303
344
|
/**
|
|
304
345
|
* Amino acid residue full names.
|
|
305
346
|
*
|
|
306
347
|
* @static
|
|
307
|
-
* @type {
|
|
348
|
+
* @type {StringDictionary}
|
|
308
349
|
* @memberof ChemPalette
|
|
309
350
|
*/
|
|
310
|
-
static AAFullNames:
|
|
351
|
+
static AAFullNames: StringDictionary = {
|
|
311
352
|
'Ala': 'A',
|
|
312
353
|
'Arg': 'R',
|
|
313
354
|
'Asn': 'N',
|
|
@@ -328,7 +369,7 @@ export class ChemPalette {
|
|
|
328
369
|
'Trp': 'W',
|
|
329
370
|
'Tyr': 'Y',
|
|
330
371
|
'Val': 'V',
|
|
331
|
-
}
|
|
372
|
+
};
|
|
332
373
|
|
|
333
374
|
/**
|
|
334
375
|
* Get Datagrok palette.
|
|
@@ -142,9 +142,9 @@ const _lib = [
|
|
|
142
142
|
const weightsLib : {[name: string]: number} = {};
|
|
143
143
|
|
|
144
144
|
// Create a dictionary linking one-letter code with the corresponding residues weight.
|
|
145
|
-
for (const d of _lib)
|
|
145
|
+
for (const d of _lib)
|
|
146
146
|
weightsLib[d['One-letter']] = parseFloat(d.Weight.substring(0, d.Weight.length-2));
|
|
147
|
-
|
|
147
|
+
|
|
148
148
|
|
|
149
149
|
/**
|
|
150
150
|
* Calculates molecular weight of the given peptide in daltons.
|
|
@@ -167,9 +167,8 @@ export function getSequenceMolecularWeight(sequence: string): number {
|
|
|
167
167
|
}
|
|
168
168
|
|
|
169
169
|
for (const i of sequence) {
|
|
170
|
-
if (i in weightsLib)
|
|
170
|
+
if (i in weightsLib)
|
|
171
171
|
sum += weightsLib[i];
|
|
172
|
-
}
|
|
173
172
|
}
|
|
174
173
|
return sum;
|
|
175
174
|
}
|
|
@@ -53,9 +53,9 @@ function _stringsToAligned(alignment: string[]): string[] {
|
|
|
53
53
|
const nItems = alignment.length;
|
|
54
54
|
const aligned = new Array<string>(nItems);
|
|
55
55
|
|
|
56
|
-
for (let i = 0; i < nItems; ++i)
|
|
56
|
+
for (let i = 0; i < nItems; ++i)
|
|
57
57
|
aligned[i] = _castAligned(alignment[i]);
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
return aligned;
|
|
60
60
|
}
|
|
61
61
|
|
|
@@ -69,9 +69,8 @@ function _stringsToAligned(alignment: string[]): string[] {
|
|
|
69
69
|
export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.Column> {
|
|
70
70
|
let sequences = col.toList();
|
|
71
71
|
|
|
72
|
-
if (isAligned)
|
|
72
|
+
if (isAligned)
|
|
73
73
|
sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
|
|
74
|
-
}
|
|
75
74
|
|
|
76
75
|
const fasta = _stringsToFasta(sequences);
|
|
77
76
|
|
|
@@ -8,7 +8,7 @@ import {DimensionalityReducer} from '@datagrok-libraries/ml/src/reduce-dimension
|
|
|
8
8
|
import {
|
|
9
9
|
createDimensinalityReducingWorker,
|
|
10
10
|
} from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
|
|
11
|
-
import {
|
|
11
|
+
import {Measure, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
12
12
|
import {Coordinates} from '@datagrok-libraries/utils/src/type-declarations';
|
|
13
13
|
|
|
14
14
|
/**
|
|
@@ -20,9 +20,8 @@ import {Coordinates} from '@datagrok-libraries/utils/src/type-declarations';
|
|
|
20
20
|
function inferActivityColumnsName(table: DG.DataFrame): string | null {
|
|
21
21
|
const re = /activity|ic50/i;
|
|
22
22
|
for (const name of table.columns.names()) {
|
|
23
|
-
if (name.match(re))
|
|
23
|
+
if (name.match(re))
|
|
24
24
|
return name;
|
|
25
|
-
}
|
|
26
25
|
}
|
|
27
26
|
return null;
|
|
28
27
|
}
|
|
@@ -53,40 +52,30 @@ export function cleanAlignedSequencesColumn(col: DG.Column): Array<string> {
|
|
|
53
52
|
* @return {Promise<DG.ScatterPlotViewer>} A viewer.
|
|
54
53
|
*/
|
|
55
54
|
export async function createPeptideSimilaritySpaceViewer(
|
|
56
|
-
table: DG.DataFrame,
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
measure: string,
|
|
60
|
-
cyclesCount: number,
|
|
61
|
-
view: DG.TableView | null,
|
|
62
|
-
activityColumnName?: string | null,
|
|
63
|
-
): Promise<DG.ScatterPlotViewer> {
|
|
64
|
-
const pi = DG.TaskBarProgressIndicator.create('Creating embedding.');
|
|
55
|
+
table: DG.DataFrame, alignedSequencesColumn: DG.Column, method: string, measure: string, cyclesCount: number,
|
|
56
|
+
view: DG.TableView | null, activityColumnName?: string | null): Promise<DG.ScatterPlotViewer> {
|
|
57
|
+
const pi = DG.TaskBarProgressIndicator.create('Creating embedding...');
|
|
65
58
|
|
|
66
59
|
activityColumnName = activityColumnName ?? inferActivityColumnsName(table);
|
|
67
60
|
|
|
68
61
|
const axesNames = ['~X', '~Y', '~MW'];
|
|
69
62
|
const columnData = alignedSequencesColumn.toList().map((v, _) => AlignedSequenceEncoder.clean(v));
|
|
70
63
|
|
|
71
|
-
const embcols = await createDimensinalityReducingWorker(
|
|
64
|
+
const embcols = await createDimensinalityReducingWorker(
|
|
65
|
+
{data: columnData, metric: measure as StringMetrics}, method, cyclesCount);
|
|
72
66
|
|
|
73
67
|
const columns = Array.from(
|
|
74
|
-
embcols as Coordinates,
|
|
75
|
-
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
for (let i = 0; i < sequences.length; ++i) {
|
|
83
|
-
currentSequence = sequences[i];
|
|
84
|
-
mw[i] = currentSequence == null ? 0 : getSequenceMolecularWeight(currentSequence);
|
|
85
|
-
}
|
|
68
|
+
embcols as Coordinates, (v: Float32Array, k) => DG.Column.fromFloat32Array(axesNames[k], v));
|
|
69
|
+
|
|
70
|
+
function _getMW(sequences: string[]) {
|
|
71
|
+
const mw: Float32Array = new Float32Array(sequences.length);
|
|
72
|
+
|
|
73
|
+
mw.map((_, index) => getSequenceMolecularWeight(sequences[index] ?? ''));
|
|
74
|
+
|
|
86
75
|
return mw;
|
|
87
76
|
}
|
|
88
77
|
|
|
89
|
-
columns.push(DG.Column.fromFloat32Array('~MW', _getMW()));
|
|
78
|
+
columns.push(DG.Column.fromFloat32Array('~MW', _getMW(columnData)));
|
|
90
79
|
|
|
91
80
|
const edf = DG.DataFrame.fromColumns(columns);
|
|
92
81
|
|
|
@@ -100,17 +89,15 @@ export async function createPeptideSimilaritySpaceViewer(
|
|
|
100
89
|
const v = newCol.get(i);
|
|
101
90
|
table.set(axis, i, v);
|
|
102
91
|
}
|
|
103
|
-
} else
|
|
92
|
+
} else
|
|
104
93
|
table.columns.insert(newCol);
|
|
105
|
-
}
|
|
106
94
|
}
|
|
107
95
|
|
|
108
|
-
const viewerOptions = {
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
}
|
|
96
|
+
const viewerOptions = {
|
|
97
|
+
x: '~X', y: '~Y', color: activityColumnName ?? '~MW', size: '~MW', title: 'Peptide Space', showYSelector: false,
|
|
98
|
+
showXSelector: false, showColorSelector: false, showSizeSelector: false,
|
|
99
|
+
};
|
|
100
|
+
const viewer = table.plot.scatter(viewerOptions);
|
|
114
101
|
|
|
115
102
|
pi.close();
|
|
116
103
|
return viewer;
|
|
@@ -141,7 +128,7 @@ export class PeptideSimilaritySpaceWidget {
|
|
|
141
128
|
*/
|
|
142
129
|
constructor(alignedSequencesColumn: DG.Column, view: DG.TableView) {
|
|
143
130
|
this.availableMethods = DimensionalityReducer.availableMethods;
|
|
144
|
-
this.availableMetrics =
|
|
131
|
+
this.availableMetrics = Measure.getMetricByDataType('String');
|
|
145
132
|
this.method = this.availableMethods[0];
|
|
146
133
|
this.metrics = this.availableMetrics[0];
|
|
147
134
|
this.currentDf = alignedSequencesColumn.dataFrame;
|
|
@@ -177,9 +164,10 @@ export class PeptideSimilaritySpaceWidget {
|
|
|
177
164
|
* @memberof PeptideSimilaritySpaceWidget
|
|
178
165
|
*/
|
|
179
166
|
protected async updateViewer() {
|
|
180
|
-
this.viewer.lastChild?.remove();
|
|
181
167
|
const viewer = await this.drawViewer();
|
|
168
|
+
this.viewer.lastChild?.remove();
|
|
182
169
|
this.viewer.appendChild(viewer.root);
|
|
170
|
+
viewer.dataFrame?.fireValuesChanged();
|
|
183
171
|
}
|
|
184
172
|
|
|
185
173
|
/**
|
|
@@ -224,6 +212,25 @@ export class PeptideSimilaritySpaceWidget {
|
|
|
224
212
|
* @memberof PeptideSimilaritySpaceWidget
|
|
225
213
|
*/
|
|
226
214
|
public async draw(): Promise<DG.Widget> {
|
|
227
|
-
|
|
215
|
+
const plot = await this.drawViewer();
|
|
216
|
+
const inputs = await this.drawInputs();
|
|
217
|
+
const elements = ui.divV([plot.root, inputs]);
|
|
218
|
+
|
|
219
|
+
// Move detaching scatterplot to the grid.
|
|
220
|
+
plot.onEvent('d4-viewer-detached').subscribe((args) => {
|
|
221
|
+
let found = false;
|
|
222
|
+
|
|
223
|
+
for (const v of this.view.viewers) {
|
|
224
|
+
const opts = v.getOptions() as {[name: string]: any};
|
|
225
|
+
|
|
226
|
+
if (opts.type == 'Scatter plot' && opts.look.xColumnName == '~X' && opts.look.yColumnName == '~Y')
|
|
227
|
+
found = true;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if (!found)
|
|
231
|
+
this.view.addViewer(plot);
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
return new DG.Widget(elements);
|
|
228
235
|
}
|
|
229
236
|
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import * as DG from 'datagrok-api/dg';
|
|
1
|
+
// import * as DG from 'datagrok-api/dg';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Split aligned sequence string into separate parts containing amino acid residues.
|
|
@@ -8,65 +8,65 @@ import * as DG from 'datagrok-api/dg';
|
|
|
8
8
|
* @param {boolean} [filter=true] Filter out columns with all the same residues.
|
|
9
9
|
* @return {[DG.DataFrame, number[]]} DataFrame containing split sequence and a list of invalid indexes.
|
|
10
10
|
*/
|
|
11
|
-
export function splitAlignedPeptides(peptideColumn: DG.Column, filter: boolean = true): [DG.DataFrame, number[]] {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
11
|
+
// export function splitAlignedPeptides(peptideColumn: DG.Column, filter: boolean = true): [DG.DataFrame, number[]] {
|
|
12
|
+
// const splitPeptidesArray: string[][] = [];
|
|
13
|
+
// let currentSplitPeptide: string[];
|
|
14
|
+
// let modeMonomerCount = 0;
|
|
15
|
+
// let currentLength;
|
|
16
|
+
// const colLength = peptideColumn.length;
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
18
|
+
// // splitting data
|
|
19
|
+
// const monomerLengths: {[index: string]: number} = {};
|
|
20
|
+
// for (let i = 0; i < colLength; i++) {
|
|
21
|
+
// currentSplitPeptide = peptideColumn.get(i).split('-').map((value: string) => value ? value : '-');
|
|
22
|
+
// splitPeptidesArray.push(currentSplitPeptide);
|
|
23
|
+
// currentLength = currentSplitPeptide.length;
|
|
24
|
+
// monomerLengths[currentLength + ''] =
|
|
25
|
+
// monomerLengths[currentLength + ''] ? monomerLengths[currentLength + ''] + 1 : 1;
|
|
26
|
+
// }
|
|
27
|
+
// //@ts-ignore: what I do here is converting string to number the most effective way I could find. parseInt is slow
|
|
28
|
+
// modeMonomerCount = 1 * Object.keys(monomerLengths).reduce((a, b) => monomerLengths[a] > monomerLengths[b] ? a : b);
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
}
|
|
43
|
-
for (let j = 0; j < modeMonomerCount; j++) {
|
|
44
|
-
splitColumns[j].push(j < currentLength ? currentSplitPeptide[j] : '-');
|
|
45
|
-
}
|
|
46
|
-
splitColumns[modeMonomerCount].push(nTerminal);
|
|
47
|
-
}
|
|
48
|
-
modeMonomerCount--; // minus C-terminal
|
|
30
|
+
// // making sure all of the sequences are of the same size
|
|
31
|
+
// // and marking invalid sequences
|
|
32
|
+
// let nTerminal: string;
|
|
33
|
+
// const invalidIndexes: number[] = [];
|
|
34
|
+
// let splitColumns: string[][] = Array.from({length: modeMonomerCount}, (_) => []);
|
|
35
|
+
// modeMonomerCount--; // minus N-terminal
|
|
36
|
+
// for (let i = 0; i < colLength; i++) {
|
|
37
|
+
// currentSplitPeptide = splitPeptidesArray[i];
|
|
38
|
+
// nTerminal = currentSplitPeptide.pop()!; // it is guaranteed that there will be at least one element
|
|
39
|
+
// currentLength = currentSplitPeptide.length;
|
|
40
|
+
// if (currentLength !== modeMonomerCount)
|
|
41
|
+
// invalidIndexes.push(i);
|
|
49
42
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
columnNames.splice(0, 0, 'N-terminal');
|
|
53
|
-
columnNames.push('C-terminal');
|
|
43
|
+
// for (let j = 0; j < modeMonomerCount; j++)
|
|
44
|
+
// splitColumns[j].push(j < currentLength ? currentSplitPeptide[j] : '-');
|
|
54
45
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
const isRetained = new Set(positionArray).size > 1;
|
|
59
|
-
if (!isRetained) {
|
|
60
|
-
columnNames.splice(index, 1);
|
|
61
|
-
}
|
|
62
|
-
return isRetained;
|
|
63
|
-
});
|
|
64
|
-
}
|
|
46
|
+
// splitColumns[modeMonomerCount].push(nTerminal);
|
|
47
|
+
// }
|
|
48
|
+
// modeMonomerCount--; // minus C-terminal
|
|
65
49
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
50
|
+
// //create column names list
|
|
51
|
+
// const columnNames = Array.from({length: modeMonomerCount}, (, index) => `${index + 1 < 10 ? 0 : ''}${index + 1 }`);
|
|
52
|
+
// columnNames.splice(0, 0, 'N-terminal');
|
|
53
|
+
// columnNames.push('C-terminal');
|
|
54
|
+
|
|
55
|
+
// // filter out the columns with the same values
|
|
56
|
+
// if (filter) {
|
|
57
|
+
// splitColumns = splitColumns.filter((positionArray, index) => {
|
|
58
|
+
// const isRetained = new Set(positionArray).size > 1;
|
|
59
|
+
// if (!isRetained)
|
|
60
|
+
// columnNames.splice(index, 1);
|
|
61
|
+
|
|
62
|
+
// return isRetained;
|
|
63
|
+
// });
|
|
64
|
+
// }
|
|
65
|
+
|
|
66
|
+
// return [
|
|
67
|
+
// DG.DataFrame.fromColumns(splitColumns.map((positionArray, index) => {
|
|
68
|
+
// return DG.Column.fromList('string', columnNames[index], positionArray);
|
|
69
|
+
// })),
|
|
70
|
+
// invalidIndexes,
|
|
71
|
+
// ];
|
|
72
|
+
// }
|
|
@@ -4,8 +4,9 @@ import * as DG from 'datagrok-api/dg';
|
|
|
4
4
|
import $ from 'cash-dom';
|
|
5
5
|
|
|
6
6
|
import * as logojs from 'logojs-react';
|
|
7
|
-
import {splitAlignedPeptides} from '../utils/split-aligned';
|
|
7
|
+
// import {splitAlignedPeptides} from '../utils/split-aligned';
|
|
8
8
|
import {ChemPalette} from '../utils/chem-palette';
|
|
9
|
+
import {PeptidesController} from '../peptides';
|
|
9
10
|
|
|
10
11
|
/**
|
|
11
12
|
* Logo viewer.
|
|
@@ -80,7 +81,7 @@ export class Logo extends DG.JsViewer {
|
|
|
80
81
|
this.initialized = true;
|
|
81
82
|
console.log('INIT');
|
|
82
83
|
this.target = this.dataFrame;
|
|
83
|
-
[this.splitted] = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
84
|
+
[this.splitted] = PeptidesController.splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
84
85
|
this.root.style.width = 'auto';
|
|
85
86
|
this.root.style.height = 'auto';
|
|
86
87
|
this.root.style.maxHeight = '200px';
|
|
@@ -93,9 +94,9 @@ export class Logo extends DG.JsViewer {
|
|
|
93
94
|
*/
|
|
94
95
|
onTableAttached() {
|
|
95
96
|
if (typeof this.dataFrame !== 'undefined') {
|
|
96
|
-
if (!this.initialized)
|
|
97
|
+
if (!this.initialized)
|
|
97
98
|
this.init();
|
|
98
|
-
|
|
99
|
+
|
|
99
100
|
|
|
100
101
|
this.subs.push(DG.debounce(this.dataFrame.selection.onChanged, 50).subscribe((_: any) => this.render()));
|
|
101
102
|
this.subs.push(DG.debounce(this.dataFrame.filter.onChanged, 50).subscribe((_: any) => this.render()));
|
|
@@ -141,14 +142,13 @@ export class Logo extends DG.JsViewer {
|
|
|
141
142
|
.whereRowMask(this.dataFrame!.selection)
|
|
142
143
|
.aggregate();
|
|
143
144
|
}
|
|
144
|
-
if (selected)
|
|
145
|
-
[this.splitted] = splitAlignedPeptides(this.target!.columns.bySemType(this.colSemType));
|
|
146
|
-
|
|
145
|
+
if (selected)
|
|
146
|
+
[this.splitted] = PeptidesController.splitAlignedPeptides(this.target!.columns.bySemType(this.colSemType));
|
|
147
|
+
else [this.splitted] = PeptidesController.splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
|
|
147
148
|
$(this.root).empty();
|
|
148
149
|
|
|
149
|
-
if (typeof this.dataFrame !== 'undefined')
|
|
150
|
+
if (typeof this.dataFrame !== 'undefined')
|
|
150
151
|
this.findLogo();
|
|
151
|
-
}
|
|
152
152
|
}
|
|
153
153
|
|
|
154
154
|
/**
|
|
@@ -176,17 +176,16 @@ export class Logo extends DG.JsViewer {
|
|
|
176
176
|
for (let i = 0; i < col.length; i++) {
|
|
177
177
|
const c = col.get(i);
|
|
178
178
|
if (c != '-') {
|
|
179
|
-
if (c[1] == '(')
|
|
179
|
+
if (c[1] == '(')
|
|
180
180
|
this.ppm[index][this.PROT_NUMS[c.substr(0, 1).toUpperCase()]] += 1 / size;
|
|
181
|
-
|
|
181
|
+
else if (c.substr(0, 3) in ChemPalette.AAFullNames && (c.length == 3 || c.at(3) == '('))
|
|
182
182
|
this.ppm[index][this.PROT_NUMS[ChemPalette.AAFullNames[c.substr(0, 3)]]] += 1 / size;
|
|
183
|
-
|
|
183
|
+
else if (c.at(0)?.toLowerCase() == c.at(0) && c.substr(1, 3) in ChemPalette.AAFullNames &&
|
|
184
184
|
(c.length == 4 || c.at(4) == '(')
|
|
185
|
-
)
|
|
185
|
+
)
|
|
186
186
|
this.ppm[index][this.PROT_NUMS[ChemPalette.AAFullNames[c.substr(1, 3)]]] += 1 / size;
|
|
187
|
-
|
|
187
|
+
else
|
|
188
188
|
this.ppm[index][this.PROT_NUMS[c]] += 1 / size;
|
|
189
|
-
}
|
|
190
189
|
}
|
|
191
190
|
}
|
|
192
191
|
index++;
|