@datagrok/bio 1.5.7 → 1.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/css/helm.css +3 -0
  2. package/detectors.js +9 -10
  3. package/dist/package-test.js +1095 -416
  4. package/dist/package.js +898 -250
  5. package/files/samples/sample_FASTA.csv +66 -66
  6. package/helm/JSDraw/Pistoia.HELM-uncompressed.js +9694 -0
  7. package/helm/JSDraw/Pistoia.HELM.js +27 -0
  8. package/helm/JSDraw/ReadMe.txt +8 -0
  9. package/helm/JSDraw/Scilligence.JSDraw2.Lite-uncompressed.js +31126 -0
  10. package/helm/JSDraw/Scilligence.JSDraw2.Lite.js +12 -0
  11. package/helm/JSDraw/Scilligence.JSDraw2.Resources.js +762 -0
  12. package/helm/JSDraw/dojo.js +250 -0
  13. package/helm/JSDraw/test.html +21 -0
  14. package/package.json +8 -1
  15. package/src/monomer-library.ts +199 -0
  16. package/src/package-test.ts +2 -0
  17. package/src/package.ts +41 -13
  18. package/src/tests/convert-test.ts +143 -22
  19. package/src/tests/detectors-test.ts +97 -156
  20. package/src/tests/renderer-test.ts +36 -0
  21. package/src/tests/splitter-test.ts +22 -0
  22. package/src/tests/types.ts +7 -0
  23. package/src/utils/atomic-works.ts +218 -97
  24. package/src/utils/cell-renderer.ts +214 -0
  25. package/src/utils/chem-palette.ts +280 -0
  26. package/src/utils/convert.ts +25 -16
  27. package/src/utils/misc.ts +29 -0
  28. package/src/utils/multiple-sequence-alignment.ts +1 -1
  29. package/src/utils/notation-converter.ts +120 -84
  30. package/src/utils/sequence-activity-cliffs.ts +2 -2
  31. package/src/utils/types.ts +13 -0
  32. package/src/utils/utils.ts +35 -30
  33. package/test-Bio-34f75e5127b8-c4c5a3dc.html +259 -0
  34. package/files/sample_FASTA.csv +0 -66
  35. package/files/sample_FASTA_with_activities.csv +0 -66
  36. package/files/sample_MSA.csv +0 -541
@@ -0,0 +1,280 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
6
+ import {MonomerLibrary} from '../monomer-library';
7
+
8
+
9
+ export class ChemPalette {
10
+ cp: StringDictionary = {};
11
+ isInit: boolean = false;
12
+ monomerLib: MonomerLibrary | null = null;
13
+
14
+ /**
15
+ * Creates an instance of ChemPalette.
16
+ *
17
+ * @param {string} scheme Color scheme to use.
18
+ * @param {boolean} [grouping=false] Is grouping enabled.
19
+ * @memberof ChemPalette
20
+ */
21
+ private constructor(scheme: string, grouping = false) {
22
+ if (scheme == 'grok')
23
+ this.cp = ChemPalette.getDatagrok(grouping);
24
+ }
25
+
26
+ /**
27
+ * Renders 2D representation of a amino acid residue in a tooltip.
28
+ *
29
+ * @param {DG.GridCell} cell Grid cell to show tooltip over.
30
+ * @param {number} x x coordinate of the mouse pointer.
31
+ * @param {number} y y coordinate of the mouse pointer.
32
+ * @param {MonomerLibrary} monomerLib Monomer Library instance
33
+ */
34
+ static showTooltip(cell: DG.GridCell, x: number, y: number, monomerLib: MonomerLibrary): void {
35
+ const s = cell.cell.value as string;
36
+ let toDisplay = [ui.divText(s)];
37
+ const [, aarOuter, aarInner] = ChemPalette.getColorAAPivot(s);
38
+ for (const aar of [aarOuter, aarInner]) {
39
+ if (monomerLib.monomerNames.includes(aar)) {
40
+ if (aar in ChemPalette.AANames)
41
+ toDisplay = [ui.divText(ChemPalette.AANames[aar])];
42
+
43
+ if (aar in ChemPalette.AAFullNames)
44
+ toDisplay = [ui.divText(ChemPalette.AANames[ChemPalette.AAFullNames[aar]])];
45
+
46
+ const options = {
47
+ autoCrop: true,
48
+ autoCropMargin: 0,
49
+ suppressChiralText: true,
50
+ };
51
+ const sketch = grok.chem.svgMol(monomerLib.getMonomerMol(aar), undefined, undefined, options);
52
+ if (toDisplay.length == 2)
53
+ toDisplay.push(ui.divText('Modified'));
54
+
55
+ toDisplay.push(sketch);
56
+ }
57
+ }
58
+ ui.tooltip.show(ui.divV(toDisplay), x, y);
59
+ }
60
+
61
+ /**
62
+ * Retursn divided amino with its content in the bracket, if the conetent is number, then its omitted
63
+ *
64
+ * @param {string} c raw amino
65
+ * @return {[string, string]} outer and inner content
66
+ */
67
+ static getInnerOuter(c: string): [string, string] {
68
+ let isInner = 0;
69
+ let inner = '';
70
+ let outer = '';
71
+
72
+ for (const char of c) {
73
+ if (char == '(')
74
+ isInner++;
75
+ else if (char == ')')
76
+ isInner--;
77
+ else if (isInner)
78
+ inner += char;
79
+ else
80
+ outer += char;
81
+ }
82
+
83
+ return !isNaN(parseInt(inner)) ? [outer, ''] : [outer, inner];
84
+ }
85
+
86
+ static getColorAAPivot(monomer: string = '', scheme: 'grok' = 'grok'): [string, string, string, number] {
87
+ const chemPaletteInstance = ChemPalette.getPalette(scheme);
88
+ let [outerMonomer, innerMonomer] = ChemPalette.getInnerOuter(monomer);
89
+ outerMonomer = (outerMonomer.length > 6 ? `${outerMonomer.slice(0, 3)}...` : outerMonomer);
90
+ innerMonomer = (innerMonomer.length > 6 ? `${innerMonomer.slice(0, 3)}...` : innerMonomer);
91
+
92
+ if (monomer.length == 1 || monomer[1] == '(') {
93
+ const amino = monomer[0]?.toUpperCase()!;
94
+ return amino in chemPaletteInstance ?
95
+ [chemPaletteInstance[amino], amino, innerMonomer, 1]:
96
+ [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 1];
97
+ }
98
+
99
+ if (monomer[0] == 'd' && monomer[1]! in chemPaletteInstance) {
100
+ if (monomer.length == 2 || monomer[2] == '(') {
101
+ const amino = monomer[1]?.toUpperCase()!;
102
+ return amino in chemPaletteInstance ?
103
+ [chemPaletteInstance[amino], amino, innerMonomer, 2]:
104
+ [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 2];
105
+ }
106
+ }
107
+
108
+ if (monomer.substring(0, 3) in ChemPalette.AAFullNames) {
109
+ if (monomer.length == 3 || monomer[3] == '(') {
110
+ const amino = ChemPalette.AAFullNames[monomer.substring(0, 3)];
111
+ return amino in chemPaletteInstance ?
112
+ [chemPaletteInstance[amino], amino, innerMonomer, 3]:
113
+ [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 3];
114
+ }
115
+ }
116
+
117
+ if (monomer[0]?.toLowerCase() == monomer[0]) {
118
+ if (monomer.substring(1, 3) in ChemPalette.AAFullNames) {
119
+ if (monomer.length == 4 || monomer[4] == '(') {
120
+ const amino = ChemPalette.AAFullNames[monomer.substring(1, 3)];
121
+ return amino in chemPaletteInstance ?
122
+ [chemPaletteInstance[amino], amino, innerMonomer, 4]:
123
+ [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 4];
124
+ }
125
+ }
126
+ }
127
+
128
+ return [ChemPalette.undefinedColor, outerMonomer, innerMonomer, 0];
129
+ }
130
+
131
+ static colourPalette: {[key: string]: string[]} = {
132
+ 'orange': ['rgb(255,187,120)', 'rgb(245,167,100)', 'rgb(235,137,70)', 'rgb(205, 111, 71)'],
133
+ 'all_green': ['rgb(44,160,44)', 'rgb(74,160,74)', 'rgb(23,103,57)', 'rgb(30,110,96)', 'rgb(60,131,95)',
134
+ 'rgb(24,110,79)', 'rgb(152,223,138)', 'rgb(182, 223, 138)', 'rgb(152, 193, 138)'],
135
+ 'all_blue': ['rgb(31,119,180)', 'rgb(23,190,207)', 'rgb(122, 102, 189)', 'rgb(158,218,229)', 'rgb(141, 124, 217)',
136
+ 'rgb(31, 120, 150)'],
137
+ 'magenta': ['rgb(162,106,192)', 'rgb(197,165,224)', 'rgb(208,113,218)'],
138
+ 'red': ['rgb(214,39,40)', 'rgb(255,152,150)'],
139
+ 'st_blue': ['rgb(23,190,207)', 'rgb(158,218,229)', 'rgb(31,119,180)'],
140
+ 'dark_blue': ['rgb(31,119,180)', 'rgb(31, 120, 150)'],
141
+ 'light_blue': ['rgb(23,190,207)', 'rgb(158,218,229)', 'rgb(108, 218, 229)', 'rgb(23,190,227)'],
142
+ 'lilac_blue': ['rgb(124,102,211)', 'rgb(149,134,217)', 'rgb(97, 81, 150)'],
143
+ 'dark_green': ['rgb(23,103,57)', 'rgb(30,110,96)', 'rgb(60,131,95)', 'rgb(24,110,79)'],
144
+ 'green': ['rgb(44,160,44)', 'rgb(74,160,74)'],
145
+ 'light_green': ['rgb(152,223,138)', 'rgb(182, 223, 138)', 'rgb(152, 193, 138)'],
146
+ 'st_green': ['rgb(44,160,44)', 'rgb(152,223,138)', 'rgb(39, 174, 96)', 'rgb(74,160,74)'],
147
+ 'pink': ['rgb(247,182,210)'],
148
+ 'brown': ['rgb(140,86,75)', 'rgb(102, 62, 54)'],
149
+ 'gray': ['rgb(127,127,127)', 'rgb(199,199,199)', 'rgb(196,156,148)', 'rgb(222, 222, 180)'],
150
+ 'yellow': ['rgb(188,189,34)'],
151
+ 'white': ['rgb(230,230,230)'],
152
+ };
153
+
154
+ static grokGroups: {[key: string]: string[]} = {
155
+ 'yellow': ['C', 'U'],
156
+ 'red': ['G', 'P'],
157
+ 'all_green': ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'],
158
+ 'light_blue': ['R', 'H', 'K'],
159
+ 'dark_blue': ['D', 'E'],
160
+ 'orange': ['S', 'T', 'N', 'Q'],
161
+ };
162
+
163
+ static undefinedColor = 'rgb(100,100,100)';
164
+
165
+ static makePalette(dt: {[key: string]: string[]}, simplified = false, grouping = false): StringDictionary {
166
+ const palette: { [key: string]: string } = {};
167
+ const groups = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
168
+ let currentGroup = 0;
169
+ for (const [color, monomers] of Object.entries(dt)) {
170
+ monomers.forEach((monomer, index) => {
171
+ palette[grouping ? groups[currentGroup] : monomer] = ChemPalette.colourPalette[color][simplified ? 0 : index];
172
+ });
173
+ currentGroup++;
174
+ }
175
+ return palette;
176
+ }
177
+
178
+ static AANames: StringDictionary = {
179
+ 'G': 'Glycine',
180
+ 'L': 'Leucine',
181
+ 'Y': 'Tyrosine',
182
+ 'S': 'Serine',
183
+ 'E': 'Glutamic acid',
184
+ 'Q': 'Glutamine',
185
+ 'D': 'Aspartic acid',
186
+ 'N': 'Asparagine',
187
+ 'F': 'Phenylalanine',
188
+ 'A': 'Alanine',
189
+ 'K': 'Lysine',
190
+ 'R': 'Arginine',
191
+ 'H': 'Histidine',
192
+ 'C': 'Cysteine',
193
+ 'V': 'Valine',
194
+ 'P': 'Proline',
195
+ 'W': 'Tryptophan',
196
+ 'I': 'Isoleucine',
197
+ 'M': 'Methionine',
198
+ 'T': 'Threonine',
199
+ };
200
+
201
+ static AASmiles: StringDictionary = {
202
+ 'G': 'NCC(=O)O',
203
+ 'L': 'N[C@H](CC(C)C)C(=O)O',
204
+ 'Y': 'NC(CC1=CC=C(O)C=C1)C(=O)O',
205
+ 'S': 'NC(CO)C(=O)O',
206
+ 'E': 'N[C@@H](CCC(O)=O)C(=O)O',
207
+ 'Q': 'N[C@@H](CCC(N)=O)C(=O)O',
208
+ 'D': 'N[C@@H](CC(O)=O)C(=O)O',
209
+ 'N': 'N[C@@H](CC(N)=O)C(=O)O',
210
+ 'F': 'NC(CC1=CC=CC=C1)C(=O)O',
211
+ 'A': 'N[C@H](C)C(=O)O',
212
+ 'K': 'NC(CCCCN)C(=O)O',
213
+ 'R': 'N[C@H](CCCNC(=N)C)C(=O)O',
214
+ 'H': 'NC(CC1=CN=C[N]1)C(=O)O',
215
+ 'C': 'N[C@@H](CS)C(=O)O',
216
+ 'V': 'NC(C(C)C)C(=O)O',
217
+ 'P': 'N(CCC1)C1C(=O)O',
218
+ 'W': 'N[C@@H](Cc1c2ccccc2n([H])c1)C(=O)O',
219
+ 'I': 'N[C@H]([C@H](C)CC)C(=O)O',
220
+ 'M': 'NC(CCSC)C(=O)O',
221
+ 'T': 'NC(C(O)C)C(=O)O',
222
+ };
223
+
224
+ static AASmilesTruncated: StringDictionary = {
225
+ 'G': '*C*',
226
+ 'L': 'CC(C)C[C@H](*)*',
227
+ 'Y': 'C1=CC(=CC=C1CC(*)*)O',
228
+ 'S': 'OCC(*)C*',
229
+ 'E': '*[C@@H](CCC(O)=O)*',
230
+ 'Q': '*N[C@@H](CCC(N)=O)*',
231
+ 'D': '*[C@@H](CC(O)=O)*',
232
+ 'N': '*[C@@H](CC(N)=O)*',
233
+ 'F': 'C1=CC=C(C=C1)CC(*)*',
234
+ 'A': 'C[C@H](*)*',
235
+ 'K': 'C(CCN)CC(*)*',
236
+ 'R': '*[C@H](CCCNC(=N)C)*',
237
+ 'H': 'C1=C(NC=N1)CC(*)*',
238
+ 'C': 'C([C@@H](*)*)S',
239
+ 'V': 'CC(C)C(*)*',
240
+ 'P': 'C1CCN(*)C1*',
241
+ 'W': '*[C@@H](Cc1c2ccccc2n([H])c1)*',
242
+ 'I': 'CC[C@H](C)[C@H](*)*',
243
+ 'M': 'CSCCC(*)*',
244
+ 'T': 'CC(O)C(*)*',
245
+ };
246
+
247
+ static AAFullNames: StringDictionary = {
248
+ 'Ala': 'A',
249
+ 'Arg': 'R',
250
+ 'Asn': 'N',
251
+ 'Asp': 'D',
252
+ 'Cys': 'C',
253
+ 'Gln': 'Q',
254
+ 'Glu': 'E',
255
+ 'Gly': 'G',
256
+ 'His': 'H',
257
+ 'Ile': 'I',
258
+ 'Leu': 'L',
259
+ 'Lys': 'K',
260
+ 'Met': 'M',
261
+ 'Phe': 'F',
262
+ 'Pro': 'P',
263
+ 'Ser': 'S',
264
+ 'Thr': 'T',
265
+ 'Trp': 'W',
266
+ 'Tyr': 'Y',
267
+ 'Val': 'V',
268
+ };
269
+
270
+ static getDatagrok(grouping = false): StringDictionary {
271
+ return ChemPalette.makePalette(ChemPalette.grokGroups, false, grouping);
272
+ }
273
+
274
+ static getPalette(scheme: 'grok'): StringDictionary {
275
+ if (scheme == 'grok')
276
+ return ChemPalette.getDatagrok();
277
+ else
278
+ throw new Error(`ChemPalette: scheme \`${scheme}\` does not exist`);
279
+ }
280
+ }
@@ -1,6 +1,6 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as ui from 'datagrok-api/ui';
3
- import {NotationConverter} from './notation-converter';
3
+ import {NotationConverter, NOTATION} from './notation-converter';
4
4
 
5
5
  /**
6
6
  * Converts notations of a Macromolecule column
@@ -8,27 +8,36 @@ import {NotationConverter} from './notation-converter';
8
8
  * @param {DG.column} col Column with 'Macromolecule' semantic type
9
9
  */
10
10
  export function convert(col: DG.Column): void {
11
- const current = col.tags[DG.TAGS.UNITS];
11
+ const converter = new NotationConverter(col);
12
+ const current: NOTATION = converter.sourceNotation;
12
13
  //TODO: read all notations
13
- const units = [
14
- 'fasta',
15
- 'separator',
16
- 'HELM'
14
+ const notations = [
15
+ NOTATION.FASTA,
16
+ NOTATION.SEPARATOR,
17
+ NOTATION.HELM
17
18
  ];
18
- const choices = ui.choiceInput('convert to', '', units.filter((e) => e !== current));
19
+ const filtered = notations.filter((e) => e !== current);
20
+ const targetNotationInput = ui.choiceInput('Convert to', filtered[0], filtered);
21
+
22
+ const separatorInput = ui.choiceInput('separator', '-', ['-', '.', '/']);
19
23
 
20
24
  ui.dialog('Convert sequence')
21
- .add(
22
- ui.div([
23
- ui.h1('current notation'),
24
- ui.div(current),
25
- choices.root
26
- ])
27
- )
25
+ .add(ui.div([
26
+ ui.h1('current notation'),
27
+ ui.div(current),
28
+ targetNotationInput.root
29
+ ]))
30
+ .add(ui.div([
31
+ ui.h1('Separator'),
32
+ separatorInput,
33
+
34
+ ]))
28
35
  .onOK(() => {
29
36
  //TODO: create new converted column
30
- const converter = new NotationConverter(col, choices.value!);
31
- const newColumn = converter.convert();
37
+ //const targetNotation: NOTATION = strToEnum<NOTATION>(NOTATION, targetNotationInput.value)!;
38
+ const targetNotation: NOTATION = targetNotationInput.value as NOTATION;
39
+ const separator = separatorInput.value!;
40
+ const newColumn = converter.convert(targetNotation, separator);
32
41
  col.dataFrame.columns.add(newColumn);
33
42
  })
34
43
  .show();
@@ -0,0 +1,29 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ import * as C from './constants';
4
+
5
+ export function stringToBool(str: string): boolean {
6
+ return str === 'true' ? true : false;
7
+ }
8
+
9
+ export function getSeparator(col: DG.Column<string>): string {
10
+ const separator = col.tags[C.TAGS.SEPARATOR];
11
+ if (separator)
12
+ return separator as string;
13
+
14
+ const defaultSeparators = ['.', '-', ' '];
15
+ const categories = col.categories;
16
+ const catLen = categories.length;
17
+ for (const potentialSeparator of defaultSeparators) {
18
+ if (categories.filter((sequence) => sequence.includes(potentialSeparator)).length == catLen)
19
+ return potentialSeparator;
20
+ }
21
+ return separator as string ?? '';
22
+ }
23
+
24
+ export function getTypedArrayConstructor(
25
+ maxNum: number): Uint8ArrayConstructor | Uint16ArrayConstructor | Uint32ArrayConstructor {
26
+ return maxNum < 256 ? Uint8Array :
27
+ maxNum < 65536 ? Uint16Array :
28
+ Uint32Array;
29
+ }
@@ -62,7 +62,7 @@ export async function runKalign(srcCol: DG.Column, isAligned = false): Promise<D
62
62
  const tgtUnits = srcUnits.split(':').map((p, i) => i == 1 ? p + '.MSA' : p).join(':');
63
63
 
64
64
  tgtCol.setTag(DG.TAGS.UNITS, tgtUnits);
65
- tgtCol.semType = C.SEM_TYPES.Macro_Molecule;
65
+ tgtCol.semType = DG.SEMTYPE.MACROMOLECULE;
66
66
  return tgtCol;
67
67
  }
68
68
 
@@ -1,131 +1,167 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
+ import {WebLogo} from '@datagrok-libraries/bio/src/viewers/web-logo';
2
3
 
3
- // export const enum NOTATION {
4
- // // these values can be changed to "user-friendly" ones later on
5
- // FASTA = 'fasta',
6
- // SEPARATOR = 'separator',
7
- // HELM = 'helm'
8
- // }
4
+ /** enum type to simplify setting "user-friendly" notation if necessary */
5
+ export const enum NOTATION {
6
+ FASTA = 'fasta',
7
+ SEPARATOR = 'separator',
8
+ HELM = 'helm'
9
+ }
9
10
 
11
+ /** Class for handling conversion of notation systems in Macromolecule columns */
10
12
  export class NotationConverter {
11
13
  private _sourceColumn: DG.Column; // the column to be converted
12
- private _currentUnits: string; // units of the form fasta:SEQ:NT, etc.
13
- private _sourceNotation: string; // current notation (without :SEQ:NT, etc.)
14
- private _targetNotation: string;
14
+ private _sourceUnits: string; // units, of the form fasta:SEQ:NT, etc.
15
+ private _sourceNotation: NOTATION; // current notation (without :SEQ:NT, etc.)
16
+
17
+ private get sourceUnits(): string { return this._sourceUnits; }
15
18
 
16
19
  private get sourceColumn(): DG.Column { return this._sourceColumn; }
17
- private get currentUnits(): string { return this._currentUnits; }
18
- private get sourceNotation(): string { return this._sourceNotation; }
19
- private get targetNotation(): string { return this._targetNotation; }
20
-
21
- // these values can be changed to "user-friendly" ones later on
22
- private _fasta = 'fasta';
23
- private _separator = 'separator';
24
- private _helm = 'helm';
25
-
26
- public isFasta(): boolean { return this.sourceNotation == this._fasta; }
27
- public isSeparator(): boolean { return this.sourceNotation == this._separator; }
28
- public isHelm(): boolean { return this.sourceNotation == this._helm; }
29
-
30
- private determineSourceNotation() : string {
31
- if (this.currentUnits.toLowerCase().startsWith('fasta'))
32
- return 'fasta';
33
- else if (this.currentUnits.toLowerCase().startsWith('separator'))
34
- return 'separator';
20
+
21
+ public get sourceNotation(): NOTATION { return this._sourceNotation; }
22
+
23
+ public isFasta(): boolean { return this.sourceNotation === NOTATION.FASTA; }
24
+
25
+ public isSeparator(): boolean { return this.sourceNotation === NOTATION.SEPARATOR; }
26
+
27
+ public isHelm(): boolean { return this.sourceNotation === NOTATION.HELM; }
28
+
29
+ public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }
30
+
31
+ public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }
32
+
33
+ public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }
34
+
35
+ // TODO: isRna
36
+ public isRna(): boolean { return this.sourceUnits.toLowerCase().endsWith('nt'); }
37
+
38
+ public isPeptide(): boolean { return this.sourceUnits.toLowerCase().endsWith('pt'); }
39
+
40
+ /** Associate notation types with the corresponding units */
41
+ /**
42
+ * @return {NOTATION} notation associated with the units type
43
+ */
44
+ private determineSourceNotation(): NOTATION {
45
+ if (this.sourceUnits.toLowerCase().startsWith('fasta'))
46
+ return NOTATION.FASTA;
47
+ else if (this.sourceUnits.toLowerCase().startsWith('separator'))
48
+ return NOTATION.SEPARATOR;
35
49
  else
36
50
  // TODO: handle possible exceptions
37
- return 'HELM';
51
+ return NOTATION.HELM;
38
52
  }
39
53
 
40
- private convertFastaToSeparator(): DG.Column {
41
- // TODO: implementation
42
- const len = this.sourceColumn.length;
43
- const newColName = 'converted';
44
- const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2sep'));
54
+ // TODO: write doc
55
+ private getNewColumn(targetNotation: NOTATION): DG.Column {
56
+ const col = this.sourceColumn;
57
+ const len = col.length;
58
+ const name = targetNotation + '(' + col.name + ')';
59
+ const newColName = col.dataFrame.columns.getUnusedName(name);
60
+ // dummy code
61
+ const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill(''));
45
62
  newColumn.semType = 'Macromolecule';
63
+ const newUnits = this.sourceUnits.replace(this.sourceNotation.toString(), targetNotation.toString());
64
+ newColumn.setTag(DG.TAGS.UNITS, newUnits);
65
+ // TODO: determine all the qualifiers (units, ...), perhaps, using detectors
46
66
  return newColumn;
47
67
  }
48
68
 
49
- private convertFastaToHelm(): DG.Column {
69
+ // TODO: write doc
70
+ private convertFastaToSeparator(separator: string): DG.Column {
50
71
  // TODO: implementation
51
- const len = this.sourceColumn.length;
52
- const newColName = 'converted';
53
- const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('fasta2helm'));
54
- newColumn.semType = 'Macromolecule';
72
+ // * specify separator
73
+ // * fasta gap symbol should NOT be considered '-' only, but set as a parameter
74
+ // * in fasta every position is a monomer (no multi-char monomers), call splitToMonomers() method
75
+ // * in the resulting jagged array, every gap symbol is to be replaced by
76
+ // the empty string, while the monomers, to be separated by the separator
77
+ // (specified as a parameter)
78
+ // On splitToMonomers(): /libraries/bio/src/viewers/WebLogo --> getSplitter
79
+
80
+ const gapSymbol = '-'; // to be specified as an argument
81
+ const splitterAsFasta = WebLogo.splitterAsFasta;
82
+ const newColumn = this.getNewColumn(NOTATION.SEPARATOR);
83
+ newColumn.init((idx: number) => {
84
+ const sourcePolymer = this.sourceColumn.get(idx);
85
+ const monomersArray = splitterAsFasta(sourcePolymer);
86
+ for (let i = 0; i < monomersArray.length; i++) {
87
+ if (monomersArray[i] === gapSymbol)
88
+ monomersArray[i] = '';
89
+ }
90
+ return monomersArray.join(separator);
91
+ });
92
+ return newColumn;
93
+ }
94
+
95
+ private wrapRnaNucleotideToHelm(monomer: string) {
96
+
97
+ }
98
+
99
+ private convertFastaToHelm(): DG.Column {
100
+ const gapSymbol = '-'; // to be specified as an argument
101
+ const splitterAsFasta = WebLogo.splitterAsFasta;
102
+ const newColumn = this.getNewColumn(NOTATION.HELM);
103
+ newColumn.init((idx: number) => {
104
+ const sourcePolymer = this.sourceColumn.get(idx);
105
+ const monomersArray = splitterAsFasta(sourcePolymer);
106
+ for (let i = 0; i < monomersArray.length; i++) {
107
+ // // TODO: handle gap symbols -- replace by asterisk
108
+ // if (monomersArray[i] === gapSymbol)
109
+ // monomersArray[i] = '*';
110
+ // else
111
+ }
112
+ // TODO: determine conditionally (if isDna(), or isRna(), or isPeptide()) the template
113
+ return monomersArray.join('');
114
+ });
55
115
  return newColumn;
56
116
  }
57
117
 
58
118
  private convertSeparatorToFasta(): DG.Column {
59
119
  // TODO: implementation
60
- const len = this.sourceColumn.length;
61
- const newColName = 'converted';
62
- const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2fasta'));
63
- newColumn.semType = 'Macromolecule';
64
- return newColumn;
120
+ // * similarly to fasta2separator, divide string into monomers
121
+ // * adjacent separators is a gap (symbol to be specified)
122
+ // * the monomers MUST be single-character onles, otherwise forbid
123
+ // conversion
124
+ //getSplitterWithSeparator
125
+ return this.getNewColumn(NOTATION.FASTA);
65
126
  }
66
127
 
67
128
  private convertSeparatorToHelm(): DG.Column {
68
129
  // TODO: implementation
69
- const len = this.sourceColumn.length;
70
- const newColName = 'converted';
71
- const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('sep2helm'));
72
- newColumn.semType = 'Macromolecule';
73
- return newColumn;
130
+ return this.getNewColumn(NOTATION.HELM);
74
131
  }
75
132
 
76
133
  private convertHelmToFasta(): DG.Column {
77
134
  // TODO: implementation
78
- const len = this.sourceColumn.length;
79
- const newColName = 'converted';
80
- const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2fasta'));
81
- newColumn.semType = 'Macromolecule';
82
- return newColumn;
135
+ return this.getNewColumn(NOTATION.FASTA);
83
136
  }
84
137
 
85
138
  private convertHelmToSeparator(): DG.Column {
86
- // TODO: implementation
87
- const len = this.sourceColumn.length;
88
- const newColName = 'converted';
89
- const newColumn = DG.Column.fromList('string', newColName, new Array(len).fill('helm2sep'));
90
- newColumn.semType = 'Macromolecule';
91
- return newColumn;
139
+ // TODO: implementatioreturn this.getNewColumn();
140
+ return this.getNewColumn(NOTATION.SEPARATOR);
92
141
  }
93
142
 
143
+ /** Dispatcher method for notation conversion */
94
144
  // TODO: write the bodies of converter methods
95
- public convert() : DG.Column {
96
- if (
97
- this.sourceNotation == this._fasta &&
98
- this.targetNotation == this._separator
99
- )
100
- return this.convertFastaToSeparator();
101
- else if (
102
- this.sourceNotation == this._fasta &&
103
- this.targetNotation == this._helm
104
- )
145
+ public convert(targetNotation: NOTATION, separator: string | null): DG.Column {
146
+ if (this.sourceNotation === targetNotation)
147
+ throw new Error('Target notation is not specified');
148
+ if (this.isFasta() && this.toSeparator(targetNotation))
149
+ return this.convertFastaToSeparator(separator!); // there is the only place where a separator is needed
150
+ else if (this.isFasta() && this.toHelm(targetNotation))
105
151
  return this.convertFastaToHelm();
106
- else if (
107
- this.sourceNotation == this._separator &&
108
- this.targetNotation == this._fasta
109
- )
152
+ else if (this.isSeparator() && this.toFasta(targetNotation))
110
153
  return this.convertSeparatorToFasta();
111
- else if (
112
- this.sourceNotation == this._separator &&
113
- this.targetNotation == this._helm
114
- )
154
+ else if (this.isSeparator() && this.toHelm(targetNotation))
115
155
  return this.convertSeparatorToHelm();
116
- else if (
117
- this.sourceNotation == this._helm &&
118
- this.targetNotation == this._fasta
119
- )
156
+ else if (this.isHelm() && this.toFasta(targetNotation))
120
157
  return this.convertHelmToFasta();
121
158
  else
122
159
  return this.convertHelmToSeparator();
123
160
  }
124
161
 
125
- public constructor(col: DG.Column, target: string) {
162
+ public constructor(col: DG.Column) {
126
163
  this._sourceColumn = col;
127
- this._currentUnits = this._sourceColumn.tags[DG.TAGS.UNITS];
164
+ this._sourceUnits = this._sourceColumn.tags[DG.TAGS.UNITS];
128
165
  this._sourceNotation = this.determineSourceNotation();
129
- this._targetNotation = target;
130
166
  }
131
167
  }
@@ -2,9 +2,9 @@ import {IDrawTooltipParams} from '@datagrok-libraries/ml/src/viewers/activity-cl
2
2
  import * as DG from 'datagrok-api/dg';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
  import {getSimilarityFromDistance} from '@datagrok-libraries/utils/src/similarity-metrics';
5
- import { AvailableMetrics } from '@datagrok-libraries/ml/src/typed-metrics';
5
+ import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
6
6
 
7
- export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null>{
7
+ export async function sequenceGetSimilarities(col: DG.Column, seq: string): Promise<DG.Column | null> {
8
8
  const stringArray = col.toList();
9
9
  const distances = new Array(stringArray.length).fill(0.0);
10
10
  for (let i = 0; i < stringArray.length; ++i)
@@ -0,0 +1,13 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+
3
+ export type DataFrameDict = {[key: string]: DG.DataFrame};
4
+
5
+ export namespace BarChart {
6
+ export type BarPart = {colName : string, aaName : string};
7
+ export type BarStatsObject = {name: string, count: number, selectedCount: number};
8
+ }
9
+
10
+ export type UTypedArray = Uint8Array | Uint16Array | Uint32Array;
11
+ //AAR: (Position: (index: indexList))
12
+ export type SubstitutionsInfo = Map<string, Map<string, Map<number, number[] | UTypedArray>>>;
13
+ export type SelectionObject = {[postiton: string]: string[]};