@datagrok/peptides 0.8.9 → 0.8.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -1
- package/dist/package-test.js +22626 -0
- package/dist/package.js +21429 -0
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +8840 -0
- package/jest.config.js +33 -0
- package/package.json +75 -62
- package/src/__jest__/remote.test.ts +50 -0
- package/src/__jest__/test-node.ts +96 -0
- package/src/model.ts +950 -86
- package/src/monomer-library.ts +8 -0
- package/src/package-test.ts +3 -2
- package/src/package.ts +57 -22
- package/src/peptides.ts +165 -119
- package/src/styles.css +8 -0
- package/src/tests/peptides-tests.ts +17 -78
- package/src/tests/utils.ts +1 -7
- package/src/utils/SAR-multiple-filter.ts +439 -0
- package/src/utils/SAR-multiple-selection.ts +177 -0
- package/src/utils/cell-renderer.ts +49 -50
- package/src/utils/chem-palette.ts +61 -163
- package/src/utils/constants.ts +56 -0
- package/src/utils/filtering-statistics.ts +62 -0
- package/src/utils/multiple-sequence-alignment.ts +33 -2
- package/src/utils/multivariate-analysis.ts +79 -0
- package/src/utils/peptide-similarity-space.ts +12 -31
- package/src/utils/types.ts +10 -0
- package/src/viewers/logo-viewer.ts +2 -1
- package/src/viewers/peptide-space-viewer.ts +121 -0
- package/src/viewers/sar-viewer.ts +111 -313
- package/src/viewers/stacked-barchart-viewer.ts +126 -173
- package/src/widgets/analyze-peptides.ts +39 -18
- package/src/widgets/distribution.ts +61 -0
- package/src/widgets/manual-alignment.ts +3 -3
- package/src/widgets/peptide-molecule.ts +4 -4
- package/src/widgets/subst-table.ts +30 -22
- package/test-Peptides-f8114def7953-4bf59d70.html +256 -0
- package/src/describe.ts +0 -534
- package/src/utils/split-aligned.ts +0 -72
- package/src/viewers/subst-viewer.ts +0 -320
|
@@ -3,19 +3,14 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
|
|
6
|
-
import {_package} from '../package';
|
|
7
6
|
import {MonomerLibrary} from '../monomer-library';
|
|
8
7
|
|
|
9
|
-
|
|
10
|
-
* Chem palette class.
|
|
11
|
-
*
|
|
12
|
-
* @export
|
|
13
|
-
* @class ChemPalette
|
|
14
|
-
*/
|
|
8
|
+
|
|
15
9
|
export class ChemPalette {
|
|
16
10
|
cp: StringDictionary = {};
|
|
17
11
|
isInit: boolean = false;
|
|
18
12
|
monomerLib: MonomerLibrary | null = null;
|
|
13
|
+
|
|
19
14
|
/**
|
|
20
15
|
* Creates an instance of ChemPalette.
|
|
21
16
|
*
|
|
@@ -23,7 +18,7 @@ export class ChemPalette {
|
|
|
23
18
|
* @param {boolean} [grouping=false] Is grouping enabled.
|
|
24
19
|
* @memberof ChemPalette
|
|
25
20
|
*/
|
|
26
|
-
constructor(scheme: string, grouping = false) {
|
|
21
|
+
private constructor(scheme: string, grouping = false) {
|
|
27
22
|
if (scheme == 'grok')
|
|
28
23
|
this.cp = ChemPalette.getDatagrok(grouping);
|
|
29
24
|
}
|
|
@@ -34,16 +29,22 @@ export class ChemPalette {
|
|
|
34
29
|
* @param {DG.GridCell} cell Grid cell to show tooltip over.
|
|
35
30
|
* @param {number} x x coordinate of the mouse pointer.
|
|
36
31
|
* @param {number} y y coordinate of the mouse pointer.
|
|
32
|
+
* @param {MonomerLibrary} monomerLib Monomer Library instance
|
|
37
33
|
*/
|
|
38
|
-
|
|
39
|
-
if (!this.isInit)
|
|
40
|
-
|
|
34
|
+
static showTooltip(cell: DG.GridCell, x: number, y: number, monomerLib: MonomerLibrary) {
|
|
35
|
+
// if (!this.isInit) {
|
|
36
|
+
// const validPackage = _package ?? _packageTest;
|
|
37
|
+
// if (!validPackage)
|
|
38
|
+
// throw new Error('No package instance found');
|
|
39
|
+
// this.monomerLib = new MonomerLibrary(await validPackage.files.readAsText(`HELMMonomers_June10.sdf`));
|
|
40
|
+
// this.isInit = true;
|
|
41
|
+
// }
|
|
41
42
|
|
|
42
43
|
const s = cell.cell.value as string;
|
|
43
44
|
let toDisplay = [ui.divText(s)];
|
|
44
|
-
const [, aarOuter, aarInner] =
|
|
45
|
+
const [, aarOuter, aarInner] = ChemPalette.getColorAAPivot(s);
|
|
45
46
|
for (const aar of [aarOuter, aarInner]) {
|
|
46
|
-
if (
|
|
47
|
+
if (monomerLib.monomerNames.includes(aar)) {
|
|
47
48
|
if (aar in ChemPalette.AANames)
|
|
48
49
|
toDisplay = [ui.divText(ChemPalette.AANames[aar])];
|
|
49
50
|
|
|
@@ -55,7 +56,7 @@ export class ChemPalette {
|
|
|
55
56
|
autoCropMargin: 0,
|
|
56
57
|
suppressChiralText: true,
|
|
57
58
|
};
|
|
58
|
-
const sketch = grok.chem.svgMol(
|
|
59
|
+
const sketch = grok.chem.svgMol(monomerLib.getMonomerMol(aar), undefined, undefined, options);
|
|
59
60
|
if (toDisplay.length == 2)
|
|
60
61
|
toDisplay.push(ui.divText('Modified'));
|
|
61
62
|
|
|
@@ -65,109 +66,77 @@ export class ChemPalette {
|
|
|
65
66
|
ui.tooltip.show(ui.divV(toDisplay), x, y);
|
|
66
67
|
}
|
|
67
68
|
|
|
68
|
-
/**
|
|
69
|
-
* Get color for the provided amino acid residue.
|
|
70
|
-
* @param {string} c Amino acid residue string.
|
|
71
|
-
* @return {string} Color.
|
|
72
|
-
*/
|
|
73
|
-
getColor(c: string): string {
|
|
74
|
-
const [color] = this.getColorPivot(c);
|
|
75
|
-
return color;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
69
|
/**
|
|
79
70
|
* Retursn divided amino with its content in the bracket, if the conetent is number, then its omitted
|
|
80
71
|
*
|
|
81
72
|
* @param {string} c raw amino
|
|
82
73
|
* @return {[string, string]} outer and inner content
|
|
83
74
|
*/
|
|
84
|
-
|
|
75
|
+
static getInnerOuter(c: string): [string, string] {
|
|
85
76
|
let isInner = 0;
|
|
86
77
|
let inner = '';
|
|
87
78
|
let outer = '';
|
|
88
79
|
|
|
89
|
-
for (
|
|
90
|
-
if (
|
|
80
|
+
for (const char of c) {
|
|
81
|
+
if (char == '(')
|
|
91
82
|
isInner++;
|
|
92
|
-
else if (
|
|
83
|
+
else if (char == ')')
|
|
93
84
|
isInner--;
|
|
94
85
|
else if (isInner)
|
|
95
|
-
inner +=
|
|
86
|
+
inner += char;
|
|
96
87
|
else
|
|
97
|
-
outer +=
|
|
88
|
+
outer += char;
|
|
98
89
|
}
|
|
99
90
|
|
|
100
91
|
return !isNaN(parseInt(inner)) ? [outer, ''] : [outer, inner];
|
|
101
92
|
}
|
|
102
93
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
let [outerC, innerC] = this.getInnerOuter(c);
|
|
110
|
-
outerC = (outerC.length > 6 ? outerC.slice(0, 3) + '...' : outerC);
|
|
111
|
-
innerC = (innerC.length > 6 ? innerC.slice(0, 3) + '...' : innerC);
|
|
94
|
+
static getColorAAPivot(monomer: string = '', scheme: 'grok' = 'grok'): [string, string, string, number] {
|
|
95
|
+
// const chemPaletteInstance = ChemPalette.getDatagrok();
|
|
96
|
+
const chemPaletteInstance = ChemPalette.getPalette(scheme);
|
|
97
|
+
let [outerMonomer, innerMonomer] = ChemPalette.getInnerOuter(monomer);
|
|
98
|
+
outerMonomer = (outerMonomer.length > 6 ? `${outerMonomer.slice(0, 3)}...` : outerMonomer);
|
|
99
|
+
innerMonomer = (innerMonomer.length > 6 ? `${innerMonomer.slice(0, 3)}...` : innerMonomer);
|
|
112
100
|
|
|
113
|
-
if (
|
|
114
|
-
const amino =
|
|
115
|
-
return amino in
|
|
116
|
-
[
|
|
117
|
-
[ChemPalette.undefinedColor,
|
|
101
|
+
if (monomer.length == 1 || monomer[1] == '(') {
|
|
102
|
+
const amino = monomer[0]?.toUpperCase()!;
|
|
103
|
+
return amino in chemPaletteInstance ?
|
|
104
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 1]:
|
|
105
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 1];
|
|
118
106
|
}
|
|
119
107
|
|
|
120
|
-
if (
|
|
121
|
-
if (
|
|
122
|
-
const amino =
|
|
123
|
-
return amino in
|
|
124
|
-
[
|
|
125
|
-
[ChemPalette.undefinedColor,
|
|
108
|
+
if (monomer[0] == 'd' && monomer[1]! in chemPaletteInstance) {
|
|
109
|
+
if (monomer.length == 2 || monomer[2] == '(') {
|
|
110
|
+
const amino = monomer[1]?.toUpperCase()!;
|
|
111
|
+
return amino in chemPaletteInstance ?
|
|
112
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 2]:
|
|
113
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 2];
|
|
126
114
|
}
|
|
127
115
|
}
|
|
128
116
|
|
|
129
|
-
if (
|
|
130
|
-
if (
|
|
131
|
-
const amino = ChemPalette.AAFullNames[
|
|
132
|
-
return amino in
|
|
133
|
-
[
|
|
134
|
-
[ChemPalette.undefinedColor,
|
|
117
|
+
if (monomer.substring(0, 3) in ChemPalette.AAFullNames) {
|
|
118
|
+
if (monomer.length == 3 || monomer[3] == '(') {
|
|
119
|
+
const amino = ChemPalette.AAFullNames[monomer.substring(0, 3)];
|
|
120
|
+
return amino in chemPaletteInstance ?
|
|
121
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 3]:
|
|
122
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 3];
|
|
135
123
|
}
|
|
136
124
|
}
|
|
137
125
|
|
|
138
|
-
if (
|
|
139
|
-
if (
|
|
140
|
-
if (
|
|
141
|
-
const amino = ChemPalette.AAFullNames[
|
|
142
|
-
return amino in
|
|
143
|
-
[
|
|
144
|
-
[ChemPalette.undefinedColor,
|
|
126
|
+
if (monomer[0]?.toLowerCase() == monomer[0]) {
|
|
127
|
+
if (monomer.substring(1, 3) in ChemPalette.AAFullNames) {
|
|
128
|
+
if (monomer.length == 4 || monomer[4] == '(') {
|
|
129
|
+
const amino = ChemPalette.AAFullNames[monomer.substring(1, 3)];
|
|
130
|
+
return amino in chemPaletteInstance ?
|
|
131
|
+
[chemPaletteInstance[amino], amino, innerMonomer, 4]:
|
|
132
|
+
[ChemPalette.undefinedColor, outerMonomer, innerMonomer, 4];
|
|
145
133
|
}
|
|
146
134
|
}
|
|
147
135
|
}
|
|
148
136
|
|
|
149
|
-
return [ChemPalette.undefinedColor,
|
|
137
|
+
return [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 0];
|
|
150
138
|
}
|
|
151
139
|
|
|
152
|
-
/**
|
|
153
|
-
* Get color pivot.
|
|
154
|
-
*
|
|
155
|
-
* @param c
|
|
156
|
-
* @returns
|
|
157
|
-
*/
|
|
158
|
-
getColorPivot(c = ''): [string, number] {
|
|
159
|
-
//TODO: merge with getColorAAPivot?
|
|
160
|
-
const [color,,, pivot] = this.getColorAAPivot(c);
|
|
161
|
-
return [color, pivot];
|
|
162
|
-
};
|
|
163
|
-
|
|
164
|
-
/**
|
|
165
|
-
* Color palette
|
|
166
|
-
*
|
|
167
|
-
* @static
|
|
168
|
-
* @type {{[key: string]: string[]}}
|
|
169
|
-
* @memberof ChemPalette
|
|
170
|
-
*/
|
|
171
140
|
static colourPalette: {[key: string]: string[]} = {
|
|
172
141
|
'orange': ['rgb(255,187,120)', 'rgb(245,167,100)', 'rgb(235,137,70)', 'rgb(205, 111, 71)'],
|
|
173
142
|
'all_green': ['rgb(44,160,44)', 'rgb(74,160,74)', 'rgb(23,103,57)', 'rgb(30,110,96)', 'rgb(60,131,95)',
|
|
@@ -191,13 +160,6 @@ export class ChemPalette {
|
|
|
191
160
|
'white': ['rgb(230,230,230)'],
|
|
192
161
|
};
|
|
193
162
|
|
|
194
|
-
/**
|
|
195
|
-
* Grok color scheme groups.
|
|
196
|
-
*
|
|
197
|
-
* @static
|
|
198
|
-
* @type {{[key: string]: string[]}}
|
|
199
|
-
* @memberof ChemPalette
|
|
200
|
-
*/
|
|
201
163
|
static grokGroups: {[key: string]: string[]} = {
|
|
202
164
|
'yellow': ['C', 'U'],
|
|
203
165
|
'red': ['G', 'P'],
|
|
@@ -207,38 +169,9 @@ export class ChemPalette {
|
|
|
207
169
|
'orange': ['S', 'T', 'N', 'Q'],
|
|
208
170
|
};
|
|
209
171
|
|
|
210
|
-
/**
|
|
211
|
-
* Lesk color scheme groups.
|
|
212
|
-
*
|
|
213
|
-
* @static
|
|
214
|
-
* @type {{[key: string]: string[]}}
|
|
215
|
-
* @memberof ChemPalette
|
|
216
|
-
*/
|
|
217
|
-
static leskGroups: {[key: string]: string[]} = {
|
|
218
|
-
'orange': ['G', 'A', 'S', 'T'],
|
|
219
|
-
'all_green': ['C', 'V', 'I', 'L', 'P', 'F', 'Y', 'M', 'W'],
|
|
220
|
-
'magenta': ['N', 'Q', 'H'],
|
|
221
|
-
'red': ['D', 'E'],
|
|
222
|
-
'all_blue': ['K', 'R'],
|
|
223
|
-
};
|
|
224
|
-
|
|
225
|
-
/**
|
|
226
|
-
* Undefined color.
|
|
227
|
-
*
|
|
228
|
-
* @static
|
|
229
|
-
* @memberof ChemPalette
|
|
230
|
-
*/
|
|
231
172
|
static undefinedColor = 'rgb(100,100,100)';
|
|
232
173
|
|
|
233
|
-
|
|
234
|
-
* Create palette.
|
|
235
|
-
*
|
|
236
|
-
* @param dt
|
|
237
|
-
* @param simplified Is simplified.
|
|
238
|
-
* @param grouping Is grouping enabled.
|
|
239
|
-
* @returns
|
|
240
|
-
*/
|
|
241
|
-
static makePalette(dt: {[key: string]: string[]}, simplified = false, grouping = false) {
|
|
174
|
+
static makePalette(dt: {[key: string]: string[]}, simplified = false, grouping = false): StringDictionary {
|
|
242
175
|
const palette: { [key: string]: string } = {};
|
|
243
176
|
const groups = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
|
|
244
177
|
let currentGroup = 0;
|
|
@@ -251,13 +184,6 @@ export class ChemPalette {
|
|
|
251
184
|
return palette;
|
|
252
185
|
}
|
|
253
186
|
|
|
254
|
-
/**
|
|
255
|
-
* Amino acid residue names.
|
|
256
|
-
*
|
|
257
|
-
* @static
|
|
258
|
-
* @type {StringDictionary}
|
|
259
|
-
* @memberof ChemPalette
|
|
260
|
-
*/
|
|
261
187
|
static AANames: StringDictionary = {
|
|
262
188
|
'G': 'Glycine',
|
|
263
189
|
'L': 'Leucine',
|
|
@@ -281,13 +207,6 @@ export class ChemPalette {
|
|
|
281
207
|
'T': 'Threonine',
|
|
282
208
|
};
|
|
283
209
|
|
|
284
|
-
/**
|
|
285
|
-
* Amino acid residue SMILES.
|
|
286
|
-
*
|
|
287
|
-
* @static
|
|
288
|
-
* @type {StringDictionary}
|
|
289
|
-
* @memberof ChemPalette
|
|
290
|
-
*/
|
|
291
210
|
static AASmiles: StringDictionary = {
|
|
292
211
|
'G': 'NCC(=O)O',
|
|
293
212
|
'L': 'N[C@H](CC(C)C)C(=O)O',
|
|
@@ -311,13 +230,6 @@ export class ChemPalette {
|
|
|
311
230
|
'T': 'NC(C(O)C)C(=O)O',
|
|
312
231
|
};
|
|
313
232
|
|
|
314
|
-
/**
|
|
315
|
-
* Amino acid residue truncated SMILES.
|
|
316
|
-
*
|
|
317
|
-
* @static
|
|
318
|
-
* @type {StringDictionary}
|
|
319
|
-
* @memberof ChemPalette
|
|
320
|
-
*/
|
|
321
233
|
static AASmilesTruncated: StringDictionary = {
|
|
322
234
|
'G': '*C*',
|
|
323
235
|
'L': 'CC(C)C[C@H](*)*',
|
|
@@ -341,13 +253,6 @@ export class ChemPalette {
|
|
|
341
253
|
'T': 'CC(O)C(*)*',
|
|
342
254
|
};
|
|
343
255
|
|
|
344
|
-
/**
|
|
345
|
-
* Amino acid residue full names.
|
|
346
|
-
*
|
|
347
|
-
* @static
|
|
348
|
-
* @type {StringDictionary}
|
|
349
|
-
* @memberof ChemPalette
|
|
350
|
-
*/
|
|
351
256
|
static AAFullNames: StringDictionary = {
|
|
352
257
|
'Ala': 'A',
|
|
353
258
|
'Arg': 'R',
|
|
@@ -371,23 +276,16 @@ export class ChemPalette {
|
|
|
371
276
|
'Val': 'V',
|
|
372
277
|
};
|
|
373
278
|
|
|
374
|
-
|
|
375
|
-
* Get Datagrok palette.
|
|
376
|
-
*
|
|
377
|
-
* @param grouping Is grouping enabled?
|
|
378
|
-
* @returns
|
|
379
|
-
*/
|
|
380
|
-
static getDatagrok(grouping = false) {
|
|
279
|
+
static getDatagrok(grouping = false): StringDictionary {
|
|
381
280
|
return ChemPalette.makePalette(ChemPalette.grokGroups, false, grouping);
|
|
382
281
|
}
|
|
383
282
|
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
return ChemPalette.makePalette(ChemPalette.leskGroups);
|
|
283
|
+
static getPalette(scheme: 'grok'): StringDictionary {
|
|
284
|
+
switch (scheme) {
|
|
285
|
+
case 'grok':
|
|
286
|
+
return ChemPalette.getDatagrok();
|
|
287
|
+
default:
|
|
288
|
+
throw new Error(`ChemPalette: scheme \`${scheme}\` does not exist`);
|
|
289
|
+
}
|
|
392
290
|
}
|
|
393
291
|
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
export enum COLUMNS_NAMES {
|
|
2
|
+
SPLIT_COL = '~split',
|
|
3
|
+
ACTIVITY = '~activity',
|
|
4
|
+
ACTIVITY_SCALED = 'activity_scaled',
|
|
5
|
+
ALIGNED_SEQUENCE = '~aligned_sequence',
|
|
6
|
+
AMINO_ACID_RESIDUE = 'AAR',
|
|
7
|
+
POSITION = 'Pos',
|
|
8
|
+
P_VALUE = 'pValue',
|
|
9
|
+
MEAN_DIFFERENCE = 'Mean difference',
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export enum CATEGORIES {
|
|
13
|
+
OTHER = 'Other',
|
|
14
|
+
ALL = 'All',
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export enum TAGS {
|
|
18
|
+
AAR = 'AAR',
|
|
19
|
+
POSITION = 'Pos',
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export enum SEM_TYPES {
|
|
23
|
+
AMINO_ACIDS = 'aminoAcids',
|
|
24
|
+
ALIGNED_SEQUENCE = 'alignedSequence',
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export const STATS = 'stats';
|
|
28
|
+
|
|
29
|
+
export const EMBEDDING_STATUS = 'embeddingStatus';
|
|
30
|
+
|
|
31
|
+
export const PEPTIDES_ANALYSIS = 'isPeptidesAnalysis';
|
|
32
|
+
|
|
33
|
+
export enum FLAGS {
|
|
34
|
+
CELL_CHANGING = 'isCellChanging',
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export const aarGroups = {
|
|
38
|
+
'R': 'PC', 'H': 'PC', 'K': 'PC',
|
|
39
|
+
'D': 'NC', 'E': 'NC',
|
|
40
|
+
'S': 'U', 'T': 'U', 'N': 'U', 'Q': 'U',
|
|
41
|
+
'C': 'SC', 'U': 'SC', 'G': 'SC', 'P': 'SC',
|
|
42
|
+
'A': 'H', 'V': 'H', 'I': 'H', 'L': 'H', 'M': 'H', 'F': 'H', 'Y': 'H', 'W': 'H',
|
|
43
|
+
'-': '-',
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export const groupDescription: {[key: string]: {'description': string, aminoAcids: string[]}} = {
|
|
47
|
+
'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
|
|
48
|
+
'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
|
|
49
|
+
'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
|
|
50
|
+
'SC': {'description': 'Special Cases', 'aminoAcids': ['C', 'U', 'G', 'P']},
|
|
51
|
+
'H': {
|
|
52
|
+
'description': 'Amino Acids with Hydrophobic Side Chain',
|
|
53
|
+
'aminoAcids': ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'],
|
|
54
|
+
},
|
|
55
|
+
'-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
|
|
56
|
+
};
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
|
|
3
|
+
import {tTest} from '@datagrok-libraries/statistics/src/tests';
|
|
4
|
+
|
|
5
|
+
/** Column statistics helper. */
|
|
6
|
+
export class FilteringStatistics {
|
|
7
|
+
private data?: Float32Array;
|
|
8
|
+
private stats: Stats = {
|
|
9
|
+
count: 0,
|
|
10
|
+
pValue: 1.,
|
|
11
|
+
meanDifference: 0.,
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Creates an instance of FilteringStatistics.
|
|
16
|
+
* @param {Float32Array} [data] Numeric values to consider.
|
|
17
|
+
*/
|
|
18
|
+
constructor(data?: Float32Array) {this.data = data;}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Sets values to make statistical analysis.
|
|
22
|
+
* @param {Float32Array} data Those values.
|
|
23
|
+
*/
|
|
24
|
+
setData(data: Float32Array) {this.data = data;}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Sets bit mask to split population into two groups.
|
|
28
|
+
* @param {DG.BitSet} mask The mask to perform splitting.
|
|
29
|
+
*/
|
|
30
|
+
setMask(mask: DG.BitSet) {
|
|
31
|
+
if (!this.data)
|
|
32
|
+
return;
|
|
33
|
+
const selected = this.data.filter((_, i) => mask.get(i));
|
|
34
|
+
const rest = this.data.filter((_, i) => !mask.get(i));
|
|
35
|
+
this.stats = this.calcStats(selected, rest);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Calculates simple statistics on two samples.
|
|
40
|
+
* @param {Float32Array} selected First sample.
|
|
41
|
+
* @param {Float32Array} rest Second sample.
|
|
42
|
+
* @return {Stats} Statistics.
|
|
43
|
+
*/
|
|
44
|
+
calcStats(selected: Float32Array, rest: Float32Array): Stats {
|
|
45
|
+
const testResult = tTest(selected, rest);
|
|
46
|
+
const currentMeanDiff = testResult['Mean difference']!;
|
|
47
|
+
return {
|
|
48
|
+
count: selected.length,
|
|
49
|
+
pValue: testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'],
|
|
50
|
+
meanDifference: currentMeanDiff,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Returns calculated statistics. */
|
|
55
|
+
get result(): Stats {return this.stats;}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export type Stats = {
|
|
59
|
+
count: number,
|
|
60
|
+
pValue: number,
|
|
61
|
+
meanDifference: number,
|
|
62
|
+
};
|
|
@@ -7,6 +7,8 @@ import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encod
|
|
|
7
7
|
//@ts-ignore
|
|
8
8
|
import {SEMTYPE} from '../semantics';
|
|
9
9
|
|
|
10
|
+
// let CLI: any = undefined;
|
|
11
|
+
|
|
10
12
|
/**
|
|
11
13
|
* Converts array of sequences into simple fasta string.
|
|
12
14
|
*
|
|
@@ -73,16 +75,45 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
|
|
|
73
75
|
sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
|
|
74
76
|
|
|
75
77
|
const fasta = _stringsToFasta(sequences);
|
|
78
|
+
const CLI = await new Aioli({
|
|
79
|
+
tool: 'kalign',
|
|
80
|
+
version: '3.3.1',
|
|
81
|
+
reinit: true,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// if (!CLI) {
|
|
85
|
+
// CLI = await new Aioli('kalign/3.3.1');
|
|
86
|
+
// console.info('kalign CLI was first initialized.');
|
|
87
|
+
// } else
|
|
88
|
+
// console.info('Initialized kalign CLI was reused.');
|
|
89
|
+
|
|
90
|
+
console.log(['fasta.length =', fasta.length]);
|
|
76
91
|
|
|
77
|
-
const CLI = await new Aioli('kalign/3.3.1');
|
|
78
92
|
await CLI.fs.writeFile('input.fa', fasta);
|
|
79
|
-
const output = await CLI.exec(
|
|
93
|
+
const output = await CLI.exec('kalign input.fa -f fasta -o result.fasta');
|
|
80
94
|
const buf = await CLI.cat('result.fasta');
|
|
81
95
|
|
|
82
96
|
console.warn(output);
|
|
83
97
|
|
|
98
|
+
// if (!buf)
|
|
99
|
+
// console.warn(buf);
|
|
100
|
+
|
|
84
101
|
const aligned = _fastaToStrings(buf).slice(0, sequences.length);
|
|
85
102
|
const alignedCol = DG.Column.fromStrings(`(${col.name})msa`, _stringsToAligned(aligned));
|
|
86
103
|
alignedCol.semType = SEMTYPE.ALIGNED;
|
|
87
104
|
return alignedCol;
|
|
88
105
|
}
|
|
106
|
+
|
|
107
|
+
export async function testMSAEnoughMemory(col: DG.Column) {
|
|
108
|
+
const sequencesCount = col.length;
|
|
109
|
+
const delta = sequencesCount/100;
|
|
110
|
+
|
|
111
|
+
for (let i = delta; i < sequencesCount; i += delta) {
|
|
112
|
+
try {
|
|
113
|
+
await runKalign(DG.Column.fromStrings(col.name, col.toList().slice(0, Math.round(i))));
|
|
114
|
+
console.log(`runKalign succeeded on ${i}`);
|
|
115
|
+
} catch (error) {
|
|
116
|
+
console.log(`runKalign failed on ${i} with '${error}'`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
|
|
4
|
+
import * as C from './constants';
|
|
5
|
+
|
|
6
|
+
import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
|
|
7
|
+
|
|
8
|
+
export async function callMVA(
|
|
9
|
+
tableGrid: DG.Grid,
|
|
10
|
+
view: DG.View,
|
|
11
|
+
currentDf: DG.DataFrame,
|
|
12
|
+
options: {[name: string]: string},
|
|
13
|
+
sequencesCol: DG.Column,
|
|
14
|
+
) {
|
|
15
|
+
const activityCol = await _scaleColumn(currentDf.getCol(options['activityColumnName']), options['scaling']);
|
|
16
|
+
const encDf = _encodeSequences(sequencesCol);
|
|
17
|
+
// const scaledColName = `${options['activityColumnName']}scaled`;
|
|
18
|
+
|
|
19
|
+
_insertColumns(
|
|
20
|
+
currentDf,
|
|
21
|
+
[DG.Column.fromList('double', C.COLUMNS_NAMES.ACTIVITY_SCALED, activityCol.toList())],
|
|
22
|
+
);
|
|
23
|
+
_insertColumns(currentDf, encDf.columns);
|
|
24
|
+
|
|
25
|
+
const res = await grok.functions.call('MultivariateAnalysis', {
|
|
26
|
+
table: currentDf,
|
|
27
|
+
features: encDf.columns.names(),
|
|
28
|
+
prediction: C.COLUMNS_NAMES.ACTIVITY_SCALED,
|
|
29
|
+
components: 10,
|
|
30
|
+
showScores: true,
|
|
31
|
+
showRegresCoefs: true,
|
|
32
|
+
});
|
|
33
|
+
console.log(res);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Encodes a series of sequences into a certain scale.
|
|
38
|
+
*
|
|
39
|
+
* @param {string[]} sequencesCol Column containing the sequences.
|
|
40
|
+
* @return {DG.DataFrame} The data frame with seqences encoded.
|
|
41
|
+
*/
|
|
42
|
+
function _encodeSequences(sequencesCol: DG.Column): DG.DataFrame {
|
|
43
|
+
const nRows = sequencesCol.length;
|
|
44
|
+
const nCols = AlignedSequenceEncoder.clean(sequencesCol.get(0)).length;
|
|
45
|
+
const enc = new AlignedSequenceEncoder('WimleyWhite');
|
|
46
|
+
const positions = new Array(nCols).fill(0).map((_) => new Float32Array(nRows));
|
|
47
|
+
|
|
48
|
+
for (let j = 0; j < nRows; ++j) {
|
|
49
|
+
const s = AlignedSequenceEncoder.clean(sequencesCol.get(j));
|
|
50
|
+
for (let i = 0; i < nCols; ++i)
|
|
51
|
+
positions[i][j] = enc.encodeLettter(s[i]);
|
|
52
|
+
}
|
|
53
|
+
const df = DG.DataFrame.fromColumns(positions.map(
|
|
54
|
+
(v, i) => DG.Column.fromFloat32Array((i+1).toString(), v),
|
|
55
|
+
));
|
|
56
|
+
return df;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async function _scaleColumn(column: DG.Column, method: string): Promise<DG.Column> {
|
|
60
|
+
if (method == 'none')
|
|
61
|
+
return column;
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
const formula = (method.startsWith('-') ? '0-' : '')+'Log10(${'+column.name+'})';
|
|
65
|
+
const newCol = await column.applyFormula(formula);
|
|
66
|
+
|
|
67
|
+
if (newCol == null)
|
|
68
|
+
throw new Error('Column formula returned unexpected null.');
|
|
69
|
+
|
|
70
|
+
return newCol!;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function _insertColumns(targetDf: DG.DataFrame, columns: DG.Column[]): DG.DataFrame {
|
|
74
|
+
for (const col of columns)
|
|
75
|
+
targetDf.columns.add(col);
|
|
76
|
+
|
|
77
|
+
return targetDf;
|
|
78
|
+
}
|
|
79
|
+
|