@datagrok/bio 2.1.11 → 2.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -12
- package/css/helm.css +10 -0
- package/detectors.js +83 -59
- package/dist/package-test.js +2 -68651
- package/dist/package-test.js.map +1 -0
- package/dist/package.js +2 -66040
- package/dist/package.js.map +1 -0
- package/dockerfiles/Dockerfile +86 -0
- package/files/icons/composition-analysis.svg +17 -0
- package/files/icons/sequence-diversity-viewer.svg +4 -0
- package/files/icons/sequence-similarity-viewer.svg +4 -0
- package/files/icons/vdregions-viewer.svg +22 -0
- package/files/icons/weblogo-viewer.svg +7 -0
- package/files/tests/testUrl.csv +11 -0
- package/files/tests/toAtomicLevelTest.csv +4 -0
- package/package.json +29 -32
- package/src/analysis/sequence-activity-cliffs.ts +15 -13
- package/src/analysis/sequence-diversity-viewer.ts +3 -2
- package/src/analysis/sequence-search-base-viewer.ts +4 -2
- package/src/analysis/sequence-similarity-viewer.ts +4 -4
- package/src/analysis/sequence-space.ts +2 -1
- package/src/calculations/monomerLevelMols.ts +6 -6
- package/src/package-test.ts +9 -2
- package/src/package.ts +230 -145
- package/src/substructure-search/substructure-search.ts +25 -22
- package/src/tests/Palettes-test.ts +9 -9
- package/src/tests/WebLogo-positions-test.ts +131 -68
- package/src/tests/_first-tests.ts +9 -0
- package/src/tests/activity-cliffs-tests.ts +8 -7
- package/src/tests/activity-cliffs-utils.ts +17 -9
- package/src/tests/bio-tests.ts +30 -21
- package/src/tests/checkInputColumn-tests.ts +17 -17
- package/src/tests/converters-test.ts +81 -46
- package/src/tests/detectors-benchmark-tests.ts +17 -17
- package/src/tests/detectors-tests.ts +190 -178
- package/src/tests/fasta-export-tests.ts +2 -3
- package/src/tests/monomer-libraries-tests.ts +34 -0
- package/src/tests/pepsea-tests.ts +21 -0
- package/src/tests/renderers-test.ts +33 -29
- package/src/tests/sequence-space-test.ts +6 -4
- package/src/tests/similarity-diversity-tests.ts +4 -4
- package/src/tests/splitters-test.ts +6 -7
- package/src/tests/substructure-filters-tests.ts +23 -1
- package/src/tests/utils/sequences-generators.ts +7 -7
- package/src/tests/utils.ts +2 -1
- package/src/tests/viewers.ts +16 -0
- package/src/utils/cell-renderer.ts +116 -54
- package/src/utils/constants.ts +7 -6
- package/src/utils/convert.ts +17 -11
- package/src/utils/monomer-lib.ts +174 -0
- package/src/utils/multiple-sequence-alignment.ts +49 -26
- package/src/utils/pepsea.ts +78 -0
- package/src/utils/save-as-fasta.ts +9 -8
- package/src/utils/ui-utils.ts +15 -3
- package/src/viewers/vd-regions-viewer.ts +125 -83
- package/src/viewers/web-logo-viewer.ts +1031 -0
- package/src/widgets/bio-substructure-filter.ts +38 -24
- package/tsconfig.json +71 -72
- package/webpack.config.js +4 -11
- package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -8988
- package/jest.config.js +0 -33
- package/src/__jest__/remote.test.ts +0 -77
- package/src/__jest__/test-node.ts +0 -98
- package/test-Bio-91c83d8913ff-bb573307.html +0 -392
|
@@ -3,11 +3,32 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
|
|
5
5
|
import {printLeftOrCentered, DrawStyle} from '@datagrok-libraries/bio/src/utils/cell-renderer';
|
|
6
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
7
6
|
import * as C from './constants';
|
|
7
|
+
import {
|
|
8
|
+
ALIGNMENT,
|
|
9
|
+
getPaletteByType,
|
|
10
|
+
getSplitter,
|
|
11
|
+
monomerToShort,
|
|
12
|
+
NOTATION,
|
|
13
|
+
SplitterFunc,
|
|
14
|
+
TAGS as bioTAGS,
|
|
15
|
+
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
16
|
+
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
17
|
+
import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
|
|
18
|
+
|
|
19
|
+
const enum tempTAGS {
|
|
20
|
+
referenceSequence = 'reference-sequence',
|
|
21
|
+
currentWord = 'current-word',
|
|
22
|
+
monomerWidth = 'monomer-width',
|
|
23
|
+
bioSumMaxLengthWords = 'bio-sum-maxLengthWords',
|
|
24
|
+
bioMaxIndex = 'bio-maxIndex',
|
|
25
|
+
bioMaxLengthWords = 'bio-maxLengthWords',
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
type TempType = { [tagName: string]: any };
|
|
8
29
|
|
|
9
30
|
const undefinedColor = 'rgb(100,100,100)';
|
|
10
|
-
const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string =
|
|
31
|
+
const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = monomerToShort;
|
|
11
32
|
|
|
12
33
|
function getUpdatedWidth(grid: DG.Grid | null, g: CanvasRenderingContext2D, x: number, w: number): number {
|
|
13
34
|
return grid ? Math.min(grid.canvas.width - x, w) : g.canvas.width - x;
|
|
@@ -41,16 +62,18 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
41
62
|
get defaultWidth(): number { return 230; }
|
|
42
63
|
|
|
43
64
|
onClick(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
44
|
-
gridCell.cell.column.temp
|
|
65
|
+
const colTemp: TempType = gridCell.cell.column.temp;
|
|
66
|
+
colTemp[tempTAGS.currentWord] = gridCell.cell.value;
|
|
45
67
|
gridCell.grid.invalidate();
|
|
46
68
|
}
|
|
47
69
|
|
|
48
70
|
onMouseMove(gridCell: DG.GridCell, e: MouseEvent): void {
|
|
49
|
-
if (gridCell.cell.column.getTag(
|
|
71
|
+
if (gridCell.cell.column.getTag(bioTAGS.aligned) !== ALIGNMENT.SEQ_MSA)
|
|
50
72
|
return;
|
|
51
73
|
|
|
52
|
-
const
|
|
53
|
-
const
|
|
74
|
+
const colTemp: TempType = gridCell.cell.column.temp;
|
|
75
|
+
const maxLengthWordsSum = colTemp[tempTAGS.bioSumMaxLengthWords];
|
|
76
|
+
const maxIndex = colTemp[tempTAGS.bioMaxIndex];
|
|
54
77
|
const argsX = e.offsetX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
|
|
55
78
|
let left = 0;
|
|
56
79
|
let right = maxIndex;
|
|
@@ -74,7 +97,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
74
97
|
}
|
|
75
98
|
left = (argsX >= maxLengthWordsSum[left]) ? left + 1 : left;
|
|
76
99
|
const separator = gridCell.cell.column.getTag('separator') ?? '';
|
|
77
|
-
const splitterFunc:
|
|
100
|
+
const splitterFunc: SplitterFunc = getSplitter('separator', separator);
|
|
78
101
|
const subParts: string[] = splitterFunc(gridCell.cell.value);
|
|
79
102
|
(((subParts[left]?.length ?? 0) > 0)) ?
|
|
80
103
|
ui.tooltip.show(ui.div(subParts[left]), e.x + 16, e.y + 16) : ui.tooltip.hide();
|
|
@@ -95,10 +118,10 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
95
118
|
render(
|
|
96
119
|
g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell,
|
|
97
120
|
cellStyle: DG.GridCellStyle
|
|
98
|
-
)
|
|
121
|
+
) {
|
|
99
122
|
const grid = gridCell.gridRow !== -1 ? gridCell.grid : null;
|
|
100
123
|
const cell = gridCell.cell;
|
|
101
|
-
const paletteType = gridCell.cell.column.getTag(
|
|
124
|
+
const paletteType = gridCell.cell.column.getTag(bioTAGS.alphabet);
|
|
102
125
|
const minDistanceRenderer = 50;
|
|
103
126
|
w = getUpdatedWidth(grid, g, x, w);
|
|
104
127
|
g.save();
|
|
@@ -110,19 +133,28 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
110
133
|
|
|
111
134
|
//TODO: can this be replaced/merged with splitSequence?
|
|
112
135
|
const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
|
|
136
|
+
const aligned: string = gridCell.cell.column.getTag(bioTAGS.aligned);
|
|
113
137
|
|
|
114
|
-
const palette =
|
|
138
|
+
const palette = getPaletteByType(paletteType);
|
|
115
139
|
|
|
116
|
-
const separator = gridCell.cell.column.getTag(
|
|
117
|
-
const splitLimit =
|
|
118
|
-
const splitterFunc:
|
|
119
|
-
const referenceSequence: string[] = splitterFunc(((gridCell.cell.column?.temp['reference-sequence'] != null) && (gridCell.cell.column?.temp['reference-sequence'] != ''))
|
|
120
|
-
? gridCell.cell.column.temp['reference-sequence'] : gridCell.cell.column.temp['current-word'] ?? '');
|
|
121
|
-
const monomerWidth = (gridCell.cell.column?.temp['monomer-width'] != null) ? gridCell.cell.column.temp['monomer-width'] : 'short';
|
|
122
|
-
let gapRenderer = 5;
|
|
140
|
+
const separator = gridCell.cell.column.getTag(bioTAGS.separator) ?? '';
|
|
141
|
+
const splitLimit = w / 5;
|
|
142
|
+
const splitterFunc: SplitterFunc = getSplitter(units, separator, splitLimit);
|
|
123
143
|
|
|
124
|
-
|
|
144
|
+
// TODO: Store temp data to GridColumn
|
|
145
|
+
// Now the renderer requires data frame table Column underlying GridColumn
|
|
146
|
+
const colTemp: TempType = gridCell.cell.column.temp;
|
|
147
|
+
|
|
148
|
+
const tempReferenceSequence: string | null = colTemp[tempTAGS.referenceSequence];
|
|
149
|
+
const tempCurrentWord: string | null = colTemp[tempTAGS.currentWord];
|
|
150
|
+
const tempMonomerWidth: string | null = colTemp[tempTAGS.monomerWidth];
|
|
151
|
+
const referenceSequence: string[] = splitterFunc(
|
|
152
|
+
((tempReferenceSequence != null) && (tempReferenceSequence != '')) ?
|
|
153
|
+
tempReferenceSequence : tempCurrentWord ?? '');
|
|
154
|
+
const monomerWidth: string = (tempMonomerWidth != null) ? tempMonomerWidth : 'short';
|
|
125
155
|
|
|
156
|
+
let gapRenderer = 5;
|
|
157
|
+
let maxIndex = 0;
|
|
126
158
|
let maxLengthOfMonomer = 8;
|
|
127
159
|
|
|
128
160
|
if (monomerWidth === 'short') {
|
|
@@ -140,44 +172,42 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
140
172
|
const textSize = monomerToShortFunction(amino, maxLengthOfMonomer).length * 7 + gapRenderer;
|
|
141
173
|
if (textSize > (maxLengthWords[index] ?? 0))
|
|
142
174
|
maxLengthWords[index] = textSize;
|
|
143
|
-
if (index > maxIndex)
|
|
144
|
-
maxIndex = index;
|
|
145
|
-
}
|
|
175
|
+
if (index > maxIndex) maxIndex = index;
|
|
146
176
|
});
|
|
147
177
|
samples += 1;
|
|
148
178
|
}
|
|
149
|
-
|
|
179
|
+
const minLength = 3 * 7;
|
|
150
180
|
for (let i = 0; i <= maxIndex; i++) {
|
|
151
|
-
if (maxLengthWords[i] < minLength)
|
|
152
|
-
maxLengthWords[i] = minLength;
|
|
153
|
-
}
|
|
181
|
+
if (maxLengthWords[i] < minLength) maxLengthWords[i] = minLength;
|
|
154
182
|
const maxLengthWordSum: any = {};
|
|
155
183
|
maxLengthWordSum[0] = maxLengthWords[0];
|
|
156
|
-
for (let i = 1; i <= maxIndex; i++)
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
gridCell.cell.column.temp['bio-maxIndex'] = maxIndex;
|
|
161
|
-
gridCell.cell.column.temp['bio-maxLengthWords'] = maxLengthWords;
|
|
184
|
+
for (let i = 1; i <= maxIndex; i++) maxLengthWordSum[i] = maxLengthWordSum[i - 1] + maxLengthWords[i];
|
|
185
|
+
colTemp[tempTAGS.bioSumMaxLengthWords] = maxLengthWordSum;
|
|
186
|
+
colTemp[tempTAGS.bioMaxIndex] = maxIndex;
|
|
187
|
+
colTemp[tempTAGS.bioMaxLengthWords] = maxLengthWords;
|
|
162
188
|
gridCell.cell.column.setTag('.calculatedCellRender', splitLimit.toString());
|
|
163
189
|
}
|
|
164
190
|
} else {
|
|
165
|
-
maxLengthWords =
|
|
191
|
+
maxLengthWords = colTemp[tempTAGS.bioMaxLengthWords];
|
|
166
192
|
}
|
|
167
193
|
|
|
168
194
|
const subParts: string[] = splitterFunc(cell.value);
|
|
169
195
|
let x1 = x;
|
|
170
196
|
let color = undefinedColor;
|
|
171
|
-
let drawStyle =
|
|
172
|
-
|
|
173
|
-
|
|
197
|
+
let drawStyle = DrawStyle.classic;
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
if (aligned && aligned.includes('MSA') && units == NOTATION.SEPARATOR)
|
|
201
|
+
drawStyle = DrawStyle.MSA;
|
|
174
202
|
|
|
175
203
|
subParts.every((amino, index) => {
|
|
176
204
|
color = palette.get(amino);
|
|
177
205
|
g.fillStyle = undefinedColor;
|
|
178
|
-
|
|
179
|
-
x1 =
|
|
180
|
-
|
|
206
|
+
const last = index === subParts.length - 1;
|
|
207
|
+
x1 = printLeftOrCentered(x1, y, w, h,
|
|
208
|
+
g, amino, color, 0, true, 1.0, separator, last, drawStyle,
|
|
209
|
+
maxLengthWords, index, gridCell, referenceSequence, maxLengthOfMonomer);
|
|
210
|
+
return minDistanceRenderer <= w;
|
|
181
211
|
});
|
|
182
212
|
|
|
183
213
|
g.restore();
|
|
@@ -212,14 +242,14 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
|
|
|
212
242
|
g.textBaseline = 'middle';
|
|
213
243
|
g.textAlign = 'center';
|
|
214
244
|
|
|
215
|
-
const palette =
|
|
245
|
+
const palette = getPaletteByType(gridCell.cell.column.getTag(bioTAGS.alphabet));
|
|
216
246
|
const s: string = gridCell.cell.value;
|
|
217
247
|
if (!s)
|
|
218
248
|
return;
|
|
219
249
|
const color = palette.get(s);
|
|
220
250
|
|
|
221
251
|
g.fillStyle = color;
|
|
222
|
-
g.fillText(s, x + (w / 2), y + (h / 2), w);
|
|
252
|
+
g.fillText(monomerToShort(s, 3), x + (w / 2), y + (h / 2), w);
|
|
223
253
|
}
|
|
224
254
|
}
|
|
225
255
|
|
|
@@ -250,12 +280,12 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
|
|
|
250
280
|
const grid = gridCell.grid;
|
|
251
281
|
const cell = gridCell.cell;
|
|
252
282
|
const s: string = cell.value ?? '';
|
|
253
|
-
const separator = gridCell.tableColumn!.tags[
|
|
283
|
+
const separator = gridCell.tableColumn!.tags[bioTAGS.separator];
|
|
254
284
|
const units: string = gridCell.tableColumn!.tags[DG.TAGS.UNITS];
|
|
255
285
|
w = getUpdatedWidth(grid, g, x, w);
|
|
256
286
|
//TODO: can this be replaced/merged with splitSequence?
|
|
257
287
|
const [s1, s2] = s.split('#');
|
|
258
|
-
const splitter =
|
|
288
|
+
const splitter = getSplitter(units, separator);
|
|
259
289
|
const subParts1 = splitter(s1);
|
|
260
290
|
const subParts2 = splitter(s2);
|
|
261
291
|
drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, units);
|
|
@@ -272,12 +302,12 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
272
302
|
subParts2: string [],
|
|
273
303
|
units: string,
|
|
274
304
|
fullStringLength?: boolean,
|
|
275
|
-
molDifferences?: { [key: number]: HTMLCanvasElement }
|
|
276
|
-
|
|
305
|
+
molDifferences?: { [key: number]: HTMLCanvasElement }
|
|
306
|
+
): void {
|
|
277
307
|
if (subParts1.length !== subParts2.length) {
|
|
278
|
-
const
|
|
279
|
-
subParts1
|
|
280
|
-
|
|
308
|
+
const sequences: IComparedSequences = fillShorterSequence(subParts1, subParts2);
|
|
309
|
+
subParts1 = sequences.subParts1;
|
|
310
|
+
subParts2 = sequences.subParts2;
|
|
281
311
|
}
|
|
282
312
|
const textSize1 = g.measureText(processSequence(subParts1).join(''));
|
|
283
313
|
const textSize2 = g.measureText(processSequence(subParts2).join(''));
|
|
@@ -297,9 +327,9 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
297
327
|
g.font = '12px monospace';
|
|
298
328
|
g.textBaseline = 'top';
|
|
299
329
|
|
|
300
|
-
let palette:
|
|
330
|
+
let palette: SeqPalette = UnknownSeqPalettes.Color;
|
|
301
331
|
if (units != 'HELM')
|
|
302
|
-
palette =
|
|
332
|
+
palette = getPaletteByType(units.substring(units.length - 2));
|
|
303
333
|
|
|
304
334
|
const vShift = 7;
|
|
305
335
|
for (let i = 0; i < subParts1.length; i++) {
|
|
@@ -309,17 +339,22 @@ export function drawMoleculeDifferenceOnCanvas(
|
|
|
309
339
|
|
|
310
340
|
if (amino1 != amino2) {
|
|
311
341
|
const color2 = palette.get(amino2);
|
|
312
|
-
const subX0 =
|
|
313
|
-
const subX1 =
|
|
342
|
+
const subX0 = printLeftOrCentered(updatedX, updatedY - vShift, w, h, g, amino1, color1, 0, true);
|
|
343
|
+
const subX1 = printLeftOrCentered(updatedX, updatedY + vShift, w, h, g, amino2, color2, 0, true);
|
|
314
344
|
updatedX = Math.max(subX1, subX0);
|
|
315
345
|
if (molDifferences)
|
|
316
346
|
molDifferences[i] = createDifferenceCanvas(amino1, amino2, color1, color2, updatedY, vShift, h);
|
|
317
|
-
} else { updatedX =
|
|
347
|
+
} else { updatedX = printLeftOrCentered(updatedX, updatedY, w, h, g, amino1, color1, 0, true, 0.5); }
|
|
318
348
|
updatedX += 4;
|
|
319
349
|
}
|
|
320
350
|
g.restore();
|
|
321
351
|
}
|
|
322
352
|
|
|
353
|
+
interface IComparedSequences{
|
|
354
|
+
subParts1: string[];
|
|
355
|
+
subParts2: string[];
|
|
356
|
+
}
|
|
357
|
+
|
|
323
358
|
function createDifferenceCanvas(
|
|
324
359
|
amino1: string,
|
|
325
360
|
amino2: string,
|
|
@@ -338,7 +373,34 @@ function createDifferenceCanvas(
|
|
|
338
373
|
canvas.width = width + 4;
|
|
339
374
|
context.font = '12px monospace';
|
|
340
375
|
context.textBaseline = 'top';
|
|
341
|
-
|
|
342
|
-
|
|
376
|
+
printLeftOrCentered(0, y - shift, width, h, context, amino1, color1, 0, true);
|
|
377
|
+
printLeftOrCentered(0, y + shift, width, h, context, amino2, color2, 0, true);
|
|
343
378
|
return canvas;
|
|
344
379
|
}
|
|
380
|
+
|
|
381
|
+
function fillShorterSequence(subParts1: string[], subParts2: string[]): IComparedSequences {
|
|
382
|
+
let numIdenticalStart = 0;
|
|
383
|
+
let numIdenticalEnd = 0;
|
|
384
|
+
const longerSeq = subParts1.length > subParts2.length ? subParts1 : subParts2;
|
|
385
|
+
let shorterSeq = subParts1.length > subParts2.length ? subParts2 : subParts1;
|
|
386
|
+
|
|
387
|
+
for (let i = 0; i < shorterSeq.length; i++) {
|
|
388
|
+
if (longerSeq[i] === shorterSeq[i])
|
|
389
|
+
numIdenticalStart++;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
const lengthDiff = longerSeq.length - shorterSeq.length;
|
|
393
|
+
for (let i = longerSeq.length - 1; i > lengthDiff; i--) {
|
|
394
|
+
if (longerSeq[i] === shorterSeq[i - lengthDiff])
|
|
395
|
+
numIdenticalEnd++;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
const emptyMonomersArray = new Array<string>(Math.abs(subParts1.length - subParts2.length)).fill('');
|
|
399
|
+
|
|
400
|
+
function concatWithEmptyVals(subparts: string[]): string[] {
|
|
401
|
+
return numIdenticalStart > numIdenticalEnd ? subparts.concat(emptyMonomersArray) : emptyMonomersArray.concat(subparts);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
subParts1.length > subParts2.length ? subParts2 = concatWithEmptyVals(subParts2) : subParts1 = concatWithEmptyVals(subParts1);
|
|
405
|
+
return {subParts1: subParts1, subParts2: subParts2};
|
|
406
|
+
}
|
package/src/utils/constants.ts
CHANGED
|
@@ -12,18 +12,14 @@ export enum COLUMNS_NAMES {
|
|
|
12
12
|
export enum TAGS {
|
|
13
13
|
AAR = 'AAR',
|
|
14
14
|
POSITION = 'Pos',
|
|
15
|
-
SEPARATOR = 'separator',
|
|
16
15
|
SELECTION = 'selection',
|
|
17
|
-
ALPHABET = 'alphabet',
|
|
18
|
-
ALIGNED = 'aligned',
|
|
19
16
|
}
|
|
20
17
|
|
|
21
18
|
export enum SEM_TYPES {
|
|
22
19
|
MONOMER = 'Monomer',
|
|
23
20
|
MACROMOLECULE_DIFFERENCE = 'MacromoleculeDifference',
|
|
24
21
|
ACTIVITY = 'activity',
|
|
25
|
-
ACTIVITY_SCALED = 'activityScaled'
|
|
26
|
-
MACROMOLECULE = 'Macromolecule',
|
|
22
|
+
ACTIVITY_SCALED = 'activityScaled'
|
|
27
23
|
}
|
|
28
24
|
|
|
29
25
|
export const MSA = 'MSA';
|
|
@@ -47,7 +43,7 @@ export const aarGroups = {
|
|
|
47
43
|
'-': '-',
|
|
48
44
|
};
|
|
49
45
|
|
|
50
|
-
export const groupDescription: {[key: string]: {'description': string, aminoAcids: string[]}} = {
|
|
46
|
+
export const groupDescription: { [key: string]: { 'description': string, aminoAcids: string[] } } = {
|
|
51
47
|
'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
|
|
52
48
|
'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
|
|
53
49
|
'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
|
|
@@ -58,3 +54,8 @@ export const groupDescription: {[key: string]: {'description': string, aminoAcid
|
|
|
58
54
|
},
|
|
59
55
|
'-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
|
|
60
56
|
};
|
|
57
|
+
|
|
58
|
+
export namespace PEPSEA {
|
|
59
|
+
export const SEPARATOR = '.';
|
|
60
|
+
}
|
|
61
|
+
|
package/src/utils/convert.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as grok from 'datagrok-api/grok';
|
|
4
|
-
import * as bio from '@datagrok-libraries/bio';
|
|
5
4
|
|
|
6
5
|
import $ from 'cash-dom';
|
|
7
6
|
import {Subscription} from 'rxjs';
|
|
7
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
+
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
let convertDialog: DG.Dialog | null = null;
|
|
@@ -16,13 +17,13 @@ let convertDialogSubs: Subscription[] = [];
|
|
|
16
17
|
* @param {DG.column} col Column with 'Macromolecule' semantic type
|
|
17
18
|
*/
|
|
18
19
|
export function convert(col: DG.Column): void {
|
|
19
|
-
const converter = new
|
|
20
|
-
const currentNotation:
|
|
20
|
+
const converter = new NotationConverter(col);
|
|
21
|
+
const currentNotation: NOTATION = converter.notation;
|
|
21
22
|
//TODO: read all notations
|
|
22
23
|
const notations = [
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
NOTATION.FASTA,
|
|
25
|
+
NOTATION.SEPARATOR,
|
|
26
|
+
NOTATION.HELM
|
|
26
27
|
];
|
|
27
28
|
const separatorArray = ['-', '.', '/'];
|
|
28
29
|
const filteredNotations = notations.filter((e) => e !== currentNotation);
|
|
@@ -32,7 +33,7 @@ export function convert(col: DG.Column): void {
|
|
|
32
33
|
|
|
33
34
|
// hide the separator input for non-SEPARATOR target notations
|
|
34
35
|
const toggleSeparator = () => {
|
|
35
|
-
if (targetNotationInput.value !==
|
|
36
|
+
if (targetNotationInput.value !== NOTATION.SEPARATOR)
|
|
36
37
|
$(separatorInput.root).hide();
|
|
37
38
|
else
|
|
38
39
|
$(separatorInput.root).show();
|
|
@@ -46,7 +47,7 @@ export function convert(col: DG.Column): void {
|
|
|
46
47
|
});
|
|
47
48
|
|
|
48
49
|
if (convertDialog == null) {
|
|
49
|
-
convertDialog = ui.dialog('Convert
|
|
50
|
+
convertDialog = ui.dialog('Convert Sequence Notation')
|
|
50
51
|
.add(ui.div([
|
|
51
52
|
ui.divText(
|
|
52
53
|
'Current notation: ' + currentNotation,
|
|
@@ -63,7 +64,7 @@ export function convert(col: DG.Column): void {
|
|
|
63
64
|
separatorInput.root
|
|
64
65
|
]))
|
|
65
66
|
.onOK(async () => {
|
|
66
|
-
const targetNotation = targetNotationInput.value as
|
|
67
|
+
const targetNotation = targetNotationInput.value as NOTATION;
|
|
67
68
|
const separator: string | null = separatorInput.value;
|
|
68
69
|
|
|
69
70
|
await convertDo(col, targetNotation, separator);
|
|
@@ -80,12 +81,17 @@ export function convert(col: DG.Column): void {
|
|
|
80
81
|
|
|
81
82
|
/** Creates a new column with converted sequences and detects its semantic type */
|
|
82
83
|
export async function convertDo(
|
|
83
|
-
srcCol: DG.Column, targetNotation:
|
|
84
|
+
srcCol: DG.Column, targetNotation: NOTATION, separator: string | null
|
|
84
85
|
): Promise<DG.Column> {
|
|
85
|
-
const converter = new
|
|
86
|
+
const converter = new NotationConverter(srcCol);
|
|
86
87
|
const newColumn = converter.convert(targetNotation, separator);
|
|
87
88
|
srcCol.dataFrame.columns.add(newColumn);
|
|
88
89
|
|
|
90
|
+
// Call detector directly to escape some error on detectSemanticTypes
|
|
91
|
+
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newColumn});
|
|
92
|
+
if (semType)
|
|
93
|
+
newColumn.semType = semType;
|
|
94
|
+
|
|
89
95
|
// call to calculate 'cell.renderer' tag
|
|
90
96
|
await grok.data.detectSemanticTypes(srcCol.dataFrame);
|
|
91
97
|
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
// import * as ui from 'datagrok-api/ui';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
5
|
+
import {Observable, Subject} from 'rxjs';
|
|
6
|
+
import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/index';
|
|
7
|
+
import {
|
|
8
|
+
createJsonMonomerLibFromSdf,
|
|
9
|
+
expectedMonomerData,
|
|
10
|
+
IMonomerLibHelper
|
|
11
|
+
} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
12
|
+
|
|
13
|
+
// -- Monomer libraries --
|
|
14
|
+
export const LIB_STORAGE_NAME = 'Libraries';
|
|
15
|
+
export const LIB_PATH = 'System:AppData/Bio/libraries/';
|
|
16
|
+
export const LIB_DEFAULT: { [fileName: string]: string } = {'HELMCoreLibrary.json': 'HELMCoreLibrary.json'};
|
|
17
|
+
|
|
18
|
+
export class MonomerLib implements IMonomerLib {
|
|
19
|
+
private _monomers: { [type: string]: { [name: string]: Monomer } } = {};
|
|
20
|
+
private _onChanged = new Subject<any>();
|
|
21
|
+
|
|
22
|
+
constructor(monomers: { [type: string]: { [name: string]: Monomer } }) {
|
|
23
|
+
this._monomers = monomers;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
getMonomer(monomerType: string, monomerName: string): Monomer | null {
|
|
27
|
+
if (monomerType in this._monomers! && monomerName in this._monomers![monomerType])
|
|
28
|
+
return this._monomers![monomerType][monomerName];
|
|
29
|
+
else
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
getTypes(): string[] {
|
|
34
|
+
return Object.keys(this._monomers);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
getMonomerMolsByType(type: string): { [symbol: string]: string } {
|
|
38
|
+
const res: { [symbol: string]: string } = {};
|
|
39
|
+
|
|
40
|
+
Object.keys(this._monomers[type]).forEach((monomerSymbol) => {
|
|
41
|
+
res[monomerSymbol] = this._monomers[type][monomerSymbol].molfile;
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
return res;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
getMonomerNamesByType(type: string): string[] {
|
|
48
|
+
return Object.keys(this._monomers[type]);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
get onChanged(): Observable<any> {
|
|
52
|
+
return this._onChanged;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
private _updateInt(lib: IMonomerLib): void {
|
|
56
|
+
const typesNew = lib.getTypes();
|
|
57
|
+
const types = this.getTypes();
|
|
58
|
+
|
|
59
|
+
typesNew.forEach((type) => {
|
|
60
|
+
//could possibly rewrite -> TODO: check duplicated monomer symbol
|
|
61
|
+
|
|
62
|
+
if (!types.includes(type))
|
|
63
|
+
this._monomers![type] = {};
|
|
64
|
+
|
|
65
|
+
const monomers = lib.getMonomerNamesByType(type);
|
|
66
|
+
monomers.forEach((monomerName) => {
|
|
67
|
+
this._monomers[type][monomerName] = lib.getMonomer(type, monomerName)!;
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
public update(lib: IMonomerLib): void {
|
|
73
|
+
this._updateInt(lib);
|
|
74
|
+
this._onChanged.next();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
public updateLibs(libList: IMonomerLib[], reload: boolean = false): void {
|
|
78
|
+
if (reload) this._monomers = {};
|
|
79
|
+
for (const lib of libList) this._updateInt(lib);
|
|
80
|
+
this._onChanged.next();
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
public clear(): void {
|
|
84
|
+
this._monomers = {};
|
|
85
|
+
this._onChanged.next();
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export class MonomerLibHelper implements IMonomerLibHelper {
|
|
90
|
+
private readonly _monomerLib: MonomerLib = new MonomerLib({});
|
|
91
|
+
|
|
92
|
+
/** Protect constructor to prevent multiple instantiation. */
|
|
93
|
+
protected constructor() {}
|
|
94
|
+
|
|
95
|
+
/** Singleton monomer library */
|
|
96
|
+
getBioLib(): IMonomerLib {
|
|
97
|
+
return this._monomerLib;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
private loadLibrariesPromise: Promise<void> = Promise.resolve();
|
|
101
|
+
|
|
102
|
+
/** Loads libraries based on settings in user storage {@link LIB_STORAGE_NAME}
|
|
103
|
+
* @param {boolean} reload Clean {@link monomerLib} before load libraries [false]
|
|
104
|
+
*/
|
|
105
|
+
async loadLibraries(reload: boolean = false): Promise<void> {
|
|
106
|
+
return this.loadLibrariesPromise = this.loadLibrariesPromise.then(async () => {
|
|
107
|
+
const userLibrariesSettings: string[] = Object.keys(await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true));
|
|
108
|
+
const libs: IMonomerLib[] = await Promise.all(userLibrariesSettings.map((libFileName) => {
|
|
109
|
+
//TODO handle whether files are in place
|
|
110
|
+
return this.readLibrary(LIB_PATH, libFileName);
|
|
111
|
+
}));
|
|
112
|
+
this._monomerLib.updateLibs(libs, reload);
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/** Reads library from file shares, handles .json and .sdf */
|
|
117
|
+
async readLibrary(path: string, fileName: string): Promise<IMonomerLib> {
|
|
118
|
+
let data: any[] = [];
|
|
119
|
+
let file;
|
|
120
|
+
let dfSdf;
|
|
121
|
+
const fileSource = new DG.FileSource(path);
|
|
122
|
+
if (fileName.endsWith('.sdf')) {
|
|
123
|
+
const funcList: DG.Func[] = DG.Func.find({package: 'Chem', name: 'importSdf'});
|
|
124
|
+
if (funcList.length === 1) {
|
|
125
|
+
file = await fileSource.readAsBytes(fileName);
|
|
126
|
+
dfSdf = await grok.functions.call('Chem:importSdf', {bytes: file});
|
|
127
|
+
data = createJsonMonomerLibFromSdf(dfSdf[0]);
|
|
128
|
+
} else {
|
|
129
|
+
grok.shell.warning('Chem package is not installed');
|
|
130
|
+
}
|
|
131
|
+
} else {
|
|
132
|
+
const file = await fileSource.readAsText(fileName);
|
|
133
|
+
data = JSON.parse(file);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const monomers: { [type: string]: { [name: string]: Monomer } } = {};
|
|
137
|
+
const types: string[] = [];
|
|
138
|
+
//group monomers by their type
|
|
139
|
+
data.forEach((monomer) => {
|
|
140
|
+
const monomerAdd: Monomer = {
|
|
141
|
+
'symbol': monomer['symbol'],
|
|
142
|
+
'name': monomer['name'],
|
|
143
|
+
'naturalAnalog': monomer['naturalAnalog'],
|
|
144
|
+
'molfile': monomer['molfile'],
|
|
145
|
+
'rgroups': monomer['rgroups'],
|
|
146
|
+
'polymerType': monomer['polymerType'],
|
|
147
|
+
'monomerType': monomer['monomerType'],
|
|
148
|
+
'data': {}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
Object.keys(monomer).forEach((prop) => {
|
|
152
|
+
if (!expectedMonomerData.includes(prop))
|
|
153
|
+
monomerAdd.data[prop] = monomer[prop];
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
if (!types.includes(monomer['polymerType'])) {
|
|
157
|
+
monomers[monomer['polymerType']] = {};
|
|
158
|
+
types.push(monomer['polymerType']);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
monomers[monomer['polymerType']][monomer['symbol']] = monomerAdd;
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
return new MonomerLib(monomers);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// -- Instance singleton --
|
|
168
|
+
private static _instance: MonomerLibHelper | null = null;
|
|
169
|
+
|
|
170
|
+
public static get instance(): MonomerLibHelper {
|
|
171
|
+
if (!MonomerLibHelper._instance) MonomerLibHelper._instance = new MonomerLibHelper();
|
|
172
|
+
return MonomerLibHelper._instance;
|
|
173
|
+
}
|
|
174
|
+
}
|