@datagrok/peptides 0.8.9 → 0.8.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/.eslintrc.json +2 -1
  2. package/dist/package-test.js +22626 -0
  3. package/dist/package.js +21429 -0
  4. package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +8840 -0
  5. package/jest.config.js +33 -0
  6. package/package.json +75 -62
  7. package/src/__jest__/remote.test.ts +50 -0
  8. package/src/__jest__/test-node.ts +96 -0
  9. package/src/model.ts +950 -86
  10. package/src/monomer-library.ts +8 -0
  11. package/src/package-test.ts +3 -2
  12. package/src/package.ts +57 -22
  13. package/src/peptides.ts +165 -119
  14. package/src/styles.css +8 -0
  15. package/src/tests/peptides-tests.ts +17 -78
  16. package/src/tests/utils.ts +1 -7
  17. package/src/utils/SAR-multiple-filter.ts +439 -0
  18. package/src/utils/SAR-multiple-selection.ts +177 -0
  19. package/src/utils/cell-renderer.ts +49 -50
  20. package/src/utils/chem-palette.ts +61 -163
  21. package/src/utils/constants.ts +56 -0
  22. package/src/utils/filtering-statistics.ts +62 -0
  23. package/src/utils/multiple-sequence-alignment.ts +33 -2
  24. package/src/utils/multivariate-analysis.ts +79 -0
  25. package/src/utils/peptide-similarity-space.ts +12 -31
  26. package/src/utils/types.ts +10 -0
  27. package/src/viewers/logo-viewer.ts +2 -1
  28. package/src/viewers/peptide-space-viewer.ts +121 -0
  29. package/src/viewers/sar-viewer.ts +111 -313
  30. package/src/viewers/stacked-barchart-viewer.ts +126 -173
  31. package/src/widgets/analyze-peptides.ts +39 -18
  32. package/src/widgets/distribution.ts +61 -0
  33. package/src/widgets/manual-alignment.ts +3 -3
  34. package/src/widgets/peptide-molecule.ts +4 -4
  35. package/src/widgets/subst-table.ts +30 -22
  36. package/test-Peptides-f8114def7953-4bf59d70.html +256 -0
  37. package/src/describe.ts +0 -534
  38. package/src/utils/split-aligned.ts +0 -72
  39. package/src/viewers/subst-viewer.ts +0 -320
@@ -3,19 +3,14 @@ import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
6
- import {_package} from '../package';
7
6
  import {MonomerLibrary} from '../monomer-library';
8
7
 
9
- /**
10
- * Chem palette class.
11
- *
12
- * @export
13
- * @class ChemPalette
14
- */
8
+
15
9
  export class ChemPalette {
16
10
  cp: StringDictionary = {};
17
11
  isInit: boolean = false;
18
12
  monomerLib: MonomerLibrary | null = null;
13
+
19
14
  /**
20
15
  * Creates an instance of ChemPalette.
21
16
  *
@@ -23,7 +18,7 @@ export class ChemPalette {
23
18
  * @param {boolean} [grouping=false] Is grouping enabled.
24
19
  * @memberof ChemPalette
25
20
  */
26
- constructor(scheme: string, grouping = false) {
21
+ private constructor(scheme: string, grouping = false) {
27
22
  if (scheme == 'grok')
28
23
  this.cp = ChemPalette.getDatagrok(grouping);
29
24
  }
@@ -34,16 +29,22 @@ export class ChemPalette {
34
29
  * @param {DG.GridCell} cell Grid cell to show tooltip over.
35
30
  * @param {number} x x coordinate of the mouse pointer.
36
31
  * @param {number} y y coordinate of the mouse pointer.
32
+ * @param {MonomerLibrary} monomerLib Monomer Library instance
37
33
  */
38
- async showTooltip(cell: DG.GridCell, x: number, y: number) {
39
- if (!this.isInit)
40
- this.monomerLib = new MonomerLibrary(await _package.files.readAsText(`HELMMonomers_June10.sdf`));
34
+ static showTooltip(cell: DG.GridCell, x: number, y: number, monomerLib: MonomerLibrary) {
35
+ // if (!this.isInit) {
36
+ // const validPackage = _package ?? _packageTest;
37
+ // if (!validPackage)
38
+ // throw new Error('No package instance found');
39
+ // this.monomerLib = new MonomerLibrary(await validPackage.files.readAsText(`HELMMonomers_June10.sdf`));
40
+ // this.isInit = true;
41
+ // }
41
42
 
42
43
  const s = cell.cell.value as string;
43
44
  let toDisplay = [ui.divText(s)];
44
- const [, aarOuter, aarInner] = this.getColorAAPivot(s);
45
+ const [, aarOuter, aarInner] = ChemPalette.getColorAAPivot(s);
45
46
  for (const aar of [aarOuter, aarInner]) {
46
- if (this.monomerLib!.monomerNames.includes(aar)) {
47
+ if (monomerLib.monomerNames.includes(aar)) {
47
48
  if (aar in ChemPalette.AANames)
48
49
  toDisplay = [ui.divText(ChemPalette.AANames[aar])];
49
50
 
@@ -55,7 +56,7 @@ export class ChemPalette {
55
56
  autoCropMargin: 0,
56
57
  suppressChiralText: true,
57
58
  };
58
- const sketch = grok.chem.svgMol(this.monomerLib!.getMonomerMol(aar), undefined, undefined, options);
59
+ const sketch = grok.chem.svgMol(monomerLib.getMonomerMol(aar), undefined, undefined, options);
59
60
  if (toDisplay.length == 2)
60
61
  toDisplay.push(ui.divText('Modified'));
61
62
 
@@ -65,109 +66,77 @@ export class ChemPalette {
65
66
  ui.tooltip.show(ui.divV(toDisplay), x, y);
66
67
  }
67
68
 
68
- /**
69
- * Get color for the provided amino acid residue.
70
- * @param {string} c Amino acid residue string.
71
- * @return {string} Color.
72
- */
73
- getColor(c: string): string {
74
- const [color] = this.getColorPivot(c);
75
- return color;
76
- }
77
-
78
69
  /**
79
70
  * Retursn divided amino with its content in the bracket, if the conetent is number, then its omitted
80
71
  *
81
72
  * @param {string} c raw amino
82
73
  * @return {[string, string]} outer and inner content
83
74
  */
84
- private getInnerOuter(c: string): [string, string] {
75
+ static getInnerOuter(c: string): [string, string] {
85
76
  let isInner = 0;
86
77
  let inner = '';
87
78
  let outer = '';
88
79
 
89
- for (let i = 0; i < c.length; ++i) {
90
- if (c[i] == '(')
80
+ for (const char of c) {
81
+ if (char == '(')
91
82
  isInner++;
92
- else if (c[i] == ')')
83
+ else if (char == ')')
93
84
  isInner--;
94
85
  else if (isInner)
95
- inner += c[i];
86
+ inner += char;
96
87
  else
97
- outer += c[i];
88
+ outer += char;
98
89
  }
99
90
 
100
91
  return !isNaN(parseInt(inner)) ? [outer, ''] : [outer, inner];
101
92
  }
102
93
 
103
- /**
104
- * Get color for the provided amino acid residue pivot
105
- * @param {string} [c=''] Amino acid residue string.
106
- * @return {[string, string, number]}
107
- */
108
- getColorAAPivot(c: string = ''): [string, string, string, number] {
109
- let [outerC, innerC] = this.getInnerOuter(c);
110
- outerC = (outerC.length > 6 ? outerC.slice(0, 3) + '...' : outerC);
111
- innerC = (innerC.length > 6 ? innerC.slice(0, 3) + '...' : innerC);
94
+ static getColorAAPivot(monomer: string = '', scheme: 'grok' = 'grok'): [string, string, string, number] {
95
+ // const chemPaletteInstance = ChemPalette.getDatagrok();
96
+ const chemPaletteInstance = ChemPalette.getPalette(scheme);
97
+ let [outerMonomer, innerMonomer] = ChemPalette.getInnerOuter(monomer);
98
+ outerMonomer = (outerMonomer.length > 6 ? `${outerMonomer.slice(0, 3)}...` : outerMonomer);
99
+ innerMonomer = (innerMonomer.length > 6 ? `${innerMonomer.slice(0, 3)}...` : innerMonomer);
112
100
 
113
- if (c.length == 1 || c[1] == '(') {
114
- const amino = c[0]?.toUpperCase()!;
115
- return amino in this.cp?
116
- [this.cp[amino], amino, innerC, 1]:
117
- [ChemPalette.undefinedColor, outerC, innerC, 1];
101
+ if (monomer.length == 1 || monomer[1] == '(') {
102
+ const amino = monomer[0]?.toUpperCase()!;
103
+ return amino in chemPaletteInstance ?
104
+ [chemPaletteInstance[amino], amino, innerMonomer, 1]:
105
+ [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 1];
118
106
  }
119
107
 
120
- if (c[0] == 'd' && c[1]! in this.cp) {
121
- if (c.length == 2 || c[2] == '(') {
122
- const amino = c[1]?.toUpperCase()!;
123
- return amino in this.cp?
124
- [this.cp[amino], amino, innerC, 2]:
125
- [ChemPalette.undefinedColor, outerC, innerC, 2];
108
+ if (monomer[0] == 'd' && monomer[1]! in chemPaletteInstance) {
109
+ if (monomer.length == 2 || monomer[2] == '(') {
110
+ const amino = monomer[1]?.toUpperCase()!;
111
+ return amino in chemPaletteInstance ?
112
+ [chemPaletteInstance[amino], amino, innerMonomer, 2]:
113
+ [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 2];
126
114
  }
127
115
  }
128
116
 
129
- if (c.substr(0, 3) in ChemPalette.AAFullNames) {
130
- if (c.length == 3 || c[3] == '(') {
131
- const amino = ChemPalette.AAFullNames[c.substr(0, 3)];
132
- return amino in this.cp?
133
- [this.cp[amino], amino, innerC, 3]:
134
- [ChemPalette.undefinedColor, outerC, innerC, 3];
117
+ if (monomer.substring(0, 3) in ChemPalette.AAFullNames) {
118
+ if (monomer.length == 3 || monomer[3] == '(') {
119
+ const amino = ChemPalette.AAFullNames[monomer.substring(0, 3)];
120
+ return amino in chemPaletteInstance ?
121
+ [chemPaletteInstance[amino], amino, innerMonomer, 3]:
122
+ [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 3];
135
123
  }
136
124
  }
137
125
 
138
- if (c[0]?.toLowerCase() == c[0]) {
139
- if (c.substr(1, 3) in ChemPalette.AAFullNames) {
140
- if (c.length == 4 || c[4] == '(') {
141
- const amino = ChemPalette.AAFullNames[c.substr(1, 3)];
142
- return amino in this.cp?
143
- [this.cp[amino], amino, innerC, 4]:
144
- [ChemPalette.undefinedColor, outerC, innerC, 4];
126
+ if (monomer[0]?.toLowerCase() == monomer[0]) {
127
+ if (monomer.substring(1, 3) in ChemPalette.AAFullNames) {
128
+ if (monomer.length == 4 || monomer[4] == '(') {
129
+ const amino = ChemPalette.AAFullNames[monomer.substring(1, 3)];
130
+ return amino in chemPaletteInstance ?
131
+ [chemPaletteInstance[amino], amino, innerMonomer, 4]:
132
+ [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 4];
145
133
  }
146
134
  }
147
135
  }
148
136
 
149
- return [ChemPalette.undefinedColor, outerC, innerC, 0];
137
+ return [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 0];
150
138
  }
151
139
 
152
- /**
153
- * Get color pivot.
154
- *
155
- * @param c
156
- * @returns
157
- */
158
- getColorPivot(c = ''): [string, number] {
159
- //TODO: merge with getColorAAPivot?
160
- const [color,,, pivot] = this.getColorAAPivot(c);
161
- return [color, pivot];
162
- };
163
-
164
- /**
165
- * Color palette
166
- *
167
- * @static
168
- * @type {{[key: string]: string[]}}
169
- * @memberof ChemPalette
170
- */
171
140
  static colourPalette: {[key: string]: string[]} = {
172
141
  'orange': ['rgb(255,187,120)', 'rgb(245,167,100)', 'rgb(235,137,70)', 'rgb(205, 111, 71)'],
173
142
  'all_green': ['rgb(44,160,44)', 'rgb(74,160,74)', 'rgb(23,103,57)', 'rgb(30,110,96)', 'rgb(60,131,95)',
@@ -191,13 +160,6 @@ export class ChemPalette {
191
160
  'white': ['rgb(230,230,230)'],
192
161
  };
193
162
 
194
- /**
195
- * Grok color scheme groups.
196
- *
197
- * @static
198
- * @type {{[key: string]: string[]}}
199
- * @memberof ChemPalette
200
- */
201
163
  static grokGroups: {[key: string]: string[]} = {
202
164
  'yellow': ['C', 'U'],
203
165
  'red': ['G', 'P'],
@@ -207,38 +169,9 @@ export class ChemPalette {
207
169
  'orange': ['S', 'T', 'N', 'Q'],
208
170
  };
209
171
 
210
- /**
211
- * Lesk color scheme groups.
212
- *
213
- * @static
214
- * @type {{[key: string]: string[]}}
215
- * @memberof ChemPalette
216
- */
217
- static leskGroups: {[key: string]: string[]} = {
218
- 'orange': ['G', 'A', 'S', 'T'],
219
- 'all_green': ['C', 'V', 'I', 'L', 'P', 'F', 'Y', 'M', 'W'],
220
- 'magenta': ['N', 'Q', 'H'],
221
- 'red': ['D', 'E'],
222
- 'all_blue': ['K', 'R'],
223
- };
224
-
225
- /**
226
- * Undefined color.
227
- *
228
- * @static
229
- * @memberof ChemPalette
230
- */
231
172
  static undefinedColor = 'rgb(100,100,100)';
232
173
 
233
- /**
234
- * Create palette.
235
- *
236
- * @param dt
237
- * @param simplified Is simplified.
238
- * @param grouping Is grouping enabled.
239
- * @returns
240
- */
241
- static makePalette(dt: {[key: string]: string[]}, simplified = false, grouping = false) {
174
+ static makePalette(dt: {[key: string]: string[]}, simplified = false, grouping = false): StringDictionary {
242
175
  const palette: { [key: string]: string } = {};
243
176
  const groups = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
244
177
  let currentGroup = 0;
@@ -251,13 +184,6 @@ export class ChemPalette {
251
184
  return palette;
252
185
  }
253
186
 
254
- /**
255
- * Amino acid residue names.
256
- *
257
- * @static
258
- * @type {StringDictionary}
259
- * @memberof ChemPalette
260
- */
261
187
  static AANames: StringDictionary = {
262
188
  'G': 'Glycine',
263
189
  'L': 'Leucine',
@@ -281,13 +207,6 @@ export class ChemPalette {
281
207
  'T': 'Threonine',
282
208
  };
283
209
 
284
- /**
285
- * Amino acid residue SMILES.
286
- *
287
- * @static
288
- * @type {StringDictionary}
289
- * @memberof ChemPalette
290
- */
291
210
  static AASmiles: StringDictionary = {
292
211
  'G': 'NCC(=O)O',
293
212
  'L': 'N[C@H](CC(C)C)C(=O)O',
@@ -311,13 +230,6 @@ export class ChemPalette {
311
230
  'T': 'NC(C(O)C)C(=O)O',
312
231
  };
313
232
 
314
- /**
315
- * Amino acid residue truncated SMILES.
316
- *
317
- * @static
318
- * @type {StringDictionary}
319
- * @memberof ChemPalette
320
- */
321
233
  static AASmilesTruncated: StringDictionary = {
322
234
  'G': '*C*',
323
235
  'L': 'CC(C)C[C@H](*)*',
@@ -341,13 +253,6 @@ export class ChemPalette {
341
253
  'T': 'CC(O)C(*)*',
342
254
  };
343
255
 
344
- /**
345
- * Amino acid residue full names.
346
- *
347
- * @static
348
- * @type {StringDictionary}
349
- * @memberof ChemPalette
350
- */
351
256
  static AAFullNames: StringDictionary = {
352
257
  'Ala': 'A',
353
258
  'Arg': 'R',
@@ -371,23 +276,16 @@ export class ChemPalette {
371
276
  'Val': 'V',
372
277
  };
373
278
 
374
- /**
375
- * Get Datagrok palette.
376
- *
377
- * @param grouping Is grouping enabled?
378
- * @returns
379
- */
380
- static getDatagrok(grouping = false) {
279
+ static getDatagrok(grouping = false): StringDictionary {
381
280
  return ChemPalette.makePalette(ChemPalette.grokGroups, false, grouping);
382
281
  }
383
282
 
384
- /**
385
- * Get Lesk palette.
386
- *
387
- * @param grouping Is grouping enabled?
388
- * @returns
389
- */
390
- static getLesk() {
391
- return ChemPalette.makePalette(ChemPalette.leskGroups);
283
+ static getPalette(scheme: 'grok'): StringDictionary {
284
+ switch (scheme) {
285
+ case 'grok':
286
+ return ChemPalette.getDatagrok();
287
+ default:
288
+ throw new Error(`ChemPalette: scheme \`${scheme}\` does not exist`);
289
+ }
392
290
  }
393
291
  }
@@ -0,0 +1,56 @@
1
+ export enum COLUMNS_NAMES {
2
+ SPLIT_COL = '~split',
3
+ ACTIVITY = '~activity',
4
+ ACTIVITY_SCALED = 'activity_scaled',
5
+ ALIGNED_SEQUENCE = '~aligned_sequence',
6
+ AMINO_ACID_RESIDUE = 'AAR',
7
+ POSITION = 'Pos',
8
+ P_VALUE = 'pValue',
9
+ MEAN_DIFFERENCE = 'Mean difference',
10
+ }
11
+
12
+ export enum CATEGORIES {
13
+ OTHER = 'Other',
14
+ ALL = 'All',
15
+ }
16
+
17
+ export enum TAGS {
18
+ AAR = 'AAR',
19
+ POSITION = 'Pos',
20
+ }
21
+
22
+ export enum SEM_TYPES {
23
+ AMINO_ACIDS = 'aminoAcids',
24
+ ALIGNED_SEQUENCE = 'alignedSequence',
25
+ }
26
+
27
+ export const STATS = 'stats';
28
+
29
+ export const EMBEDDING_STATUS = 'embeddingStatus';
30
+
31
+ export const PEPTIDES_ANALYSIS = 'isPeptidesAnalysis';
32
+
33
+ export enum FLAGS {
34
+ CELL_CHANGING = 'isCellChanging',
35
+ }
36
+
37
+ export const aarGroups = {
38
+ 'R': 'PC', 'H': 'PC', 'K': 'PC',
39
+ 'D': 'NC', 'E': 'NC',
40
+ 'S': 'U', 'T': 'U', 'N': 'U', 'Q': 'U',
41
+ 'C': 'SC', 'U': 'SC', 'G': 'SC', 'P': 'SC',
42
+ 'A': 'H', 'V': 'H', 'I': 'H', 'L': 'H', 'M': 'H', 'F': 'H', 'Y': 'H', 'W': 'H',
43
+ '-': '-',
44
+ };
45
+
46
+ export const groupDescription: {[key: string]: {'description': string, aminoAcids: string[]}} = {
47
+ 'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
48
+ 'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
49
+ 'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
50
+ 'SC': {'description': 'Special Cases', 'aminoAcids': ['C', 'U', 'G', 'P']},
51
+ 'H': {
52
+ 'description': 'Amino Acids with Hydrophobic Side Chain',
53
+ 'aminoAcids': ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'],
54
+ },
55
+ '-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
56
+ };
@@ -0,0 +1,62 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ import {tTest} from '@datagrok-libraries/statistics/src/tests';
4
+
5
+ /** Column statistics helper. */
6
+ export class FilteringStatistics {
7
+ private data?: Float32Array;
8
+ private stats: Stats = {
9
+ count: 0,
10
+ pValue: 1.,
11
+ meanDifference: 0.,
12
+ };
13
+
14
+ /**
15
+ * Creates an instance of FilteringStatistics.
16
+ * @param {Float32Array} [data] Numeric values to consider.
17
+ */
18
+ constructor(data?: Float32Array) {this.data = data;}
19
+
20
+ /**
21
+ * Sets values to make statistical analysis.
22
+ * @param {Float32Array} data Those values.
23
+ */
24
+ setData(data: Float32Array) {this.data = data;}
25
+
26
+ /**
27
+ * Sets bit mask to split population into two groups.
28
+ * @param {DG.BitSet} mask The mask to perform splitting.
29
+ */
30
+ setMask(mask: DG.BitSet) {
31
+ if (!this.data)
32
+ return;
33
+ const selected = this.data.filter((_, i) => mask.get(i));
34
+ const rest = this.data.filter((_, i) => !mask.get(i));
35
+ this.stats = this.calcStats(selected, rest);
36
+ }
37
+
38
+ /**
39
+ * Calculates simple statistics on two samples.
40
+ * @param {Float32Array} selected First sample.
41
+ * @param {Float32Array} rest Second sample.
42
+ * @return {Stats} Statistics.
43
+ */
44
+ calcStats(selected: Float32Array, rest: Float32Array): Stats {
45
+ const testResult = tTest(selected, rest);
46
+ const currentMeanDiff = testResult['Mean difference']!;
47
+ return {
48
+ count: selected.length,
49
+ pValue: testResult[currentMeanDiff >= 0 ? 'p-value more' : 'p-value less'],
50
+ meanDifference: currentMeanDiff,
51
+ };
52
+ }
53
+
54
+ /** Returns calculated statistics. */
55
+ get result(): Stats {return this.stats;}
56
+ }
57
+
58
+ export type Stats = {
59
+ count: number,
60
+ pValue: number,
61
+ meanDifference: number,
62
+ };
@@ -7,6 +7,8 @@ import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encod
7
7
  //@ts-ignore
8
8
  import {SEMTYPE} from '../semantics';
9
9
 
10
+ // let CLI: any = undefined;
11
+
10
12
  /**
11
13
  * Converts array of sequences into simple fasta string.
12
14
  *
@@ -73,16 +75,45 @@ export async function runKalign(col: DG.Column, isAligned = false) : Promise<DG.
73
75
  sequences = sequences.map((v: string, _) => AlignedSequenceEncoder.clean(v).replace(/\-/g, ''));
74
76
 
75
77
  const fasta = _stringsToFasta(sequences);
78
+ const CLI = await new Aioli({
79
+ tool: 'kalign',
80
+ version: '3.3.1',
81
+ reinit: true,
82
+ });
83
+
84
+ // if (!CLI) {
85
+ // CLI = await new Aioli('kalign/3.3.1');
86
+ // console.info('kalign CLI was first initialized.');
87
+ // } else
88
+ // console.info('Initialized kalign CLI was reused.');
89
+
90
+ console.log(['fasta.length =', fasta.length]);
76
91
 
77
- const CLI = await new Aioli('kalign/3.3.1');
78
92
  await CLI.fs.writeFile('input.fa', fasta);
79
- const output = await CLI.exec(`kalign input.fa -f fasta -o result.fasta`);
93
+ const output = await CLI.exec('kalign input.fa -f fasta -o result.fasta');
80
94
  const buf = await CLI.cat('result.fasta');
81
95
 
82
96
  console.warn(output);
83
97
 
98
+ // if (!buf)
99
+ // console.warn(buf);
100
+
84
101
  const aligned = _fastaToStrings(buf).slice(0, sequences.length);
85
102
  const alignedCol = DG.Column.fromStrings(`(${col.name})msa`, _stringsToAligned(aligned));
86
103
  alignedCol.semType = SEMTYPE.ALIGNED;
87
104
  return alignedCol;
88
105
  }
106
+
107
+ export async function testMSAEnoughMemory(col: DG.Column) {
108
+ const sequencesCount = col.length;
109
+ const delta = sequencesCount/100;
110
+
111
+ for (let i = delta; i < sequencesCount; i += delta) {
112
+ try {
113
+ await runKalign(DG.Column.fromStrings(col.name, col.toList().slice(0, Math.round(i))));
114
+ console.log(`runKalign succeeded on ${i}`);
115
+ } catch (error) {
116
+ console.log(`runKalign failed on ${i} with '${error}'`);
117
+ }
118
+ }
119
+ }
@@ -0,0 +1,79 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+
4
+ import * as C from './constants';
5
+
6
+ import {AlignedSequenceEncoder} from '@datagrok-libraries/bio/src/sequence-encoder';
7
+
8
+ export async function callMVA(
9
+ tableGrid: DG.Grid,
10
+ view: DG.View,
11
+ currentDf: DG.DataFrame,
12
+ options: {[name: string]: string},
13
+ sequencesCol: DG.Column,
14
+ ) {
15
+ const activityCol = await _scaleColumn(currentDf.getCol(options['activityColumnName']), options['scaling']);
16
+ const encDf = _encodeSequences(sequencesCol);
17
+ // const scaledColName = `${options['activityColumnName']}scaled`;
18
+
19
+ _insertColumns(
20
+ currentDf,
21
+ [DG.Column.fromList('double', C.COLUMNS_NAMES.ACTIVITY_SCALED, activityCol.toList())],
22
+ );
23
+ _insertColumns(currentDf, encDf.columns);
24
+
25
+ const res = await grok.functions.call('MultivariateAnalysis', {
26
+ table: currentDf,
27
+ features: encDf.columns.names(),
28
+ prediction: C.COLUMNS_NAMES.ACTIVITY_SCALED,
29
+ components: 10,
30
+ showScores: true,
31
+ showRegresCoefs: true,
32
+ });
33
+ console.log(res);
34
+ }
35
+
36
+ /**
37
+ * Encodes a series of sequences into a certain scale.
38
+ *
39
+ * @param {string[]} sequencesCol Column containing the sequences.
40
+ * @return {DG.DataFrame} The data frame with seqences encoded.
41
+ */
42
+ function _encodeSequences(sequencesCol: DG.Column): DG.DataFrame {
43
+ const nRows = sequencesCol.length;
44
+ const nCols = AlignedSequenceEncoder.clean(sequencesCol.get(0)).length;
45
+ const enc = new AlignedSequenceEncoder('WimleyWhite');
46
+ const positions = new Array(nCols).fill(0).map((_) => new Float32Array(nRows));
47
+
48
+ for (let j = 0; j < nRows; ++j) {
49
+ const s = AlignedSequenceEncoder.clean(sequencesCol.get(j));
50
+ for (let i = 0; i < nCols; ++i)
51
+ positions[i][j] = enc.encodeLettter(s[i]);
52
+ }
53
+ const df = DG.DataFrame.fromColumns(positions.map(
54
+ (v, i) => DG.Column.fromFloat32Array((i+1).toString(), v),
55
+ ));
56
+ return df;
57
+ }
58
+
59
+ async function _scaleColumn(column: DG.Column, method: string): Promise<DG.Column> {
60
+ if (method == 'none')
61
+ return column;
62
+
63
+
64
+ const formula = (method.startsWith('-') ? '0-' : '')+'Log10(${'+column.name+'})';
65
+ const newCol = await column.applyFormula(formula);
66
+
67
+ if (newCol == null)
68
+ throw new Error('Column formula returned unexpected null.');
69
+
70
+ return newCol!;
71
+ }
72
+
73
+ function _insertColumns(targetDf: DG.DataFrame, columns: DG.Column[]): DG.DataFrame {
74
+ for (const col of columns)
75
+ targetDf.columns.add(col);
76
+
77
+ return targetDf;
78
+ }
79
+